# Series

In [1]:
import numpy as np
import pandas as pd

In [2]:
# Create series (One-dimensional ndarray with axis labels) from Python list
list_1 = ['a', 'b', 'c', 'd']
labels = [1, 2, 3, 4]

ser_1 = pd.Series(data=list_1, index=labels)
ser_1

1    a
2    b
3    c
4    d
dtype: object

In [3]:
# Create series (One-dimensional ndarray with axis labels) from 1D NumPy array
arr_1 = np.array([1, 2, 3, 4])
ser_2 = pd.Series(arr_1)

# Get Series datatype
print(f"data type: {ser_2.dtype}\n")

ser_2

data type: int64



0    1
1    2
2    3
3    4
dtype: int64

In [4]:
# Create series (One-dimensional ndarray with axis labels) from Python dictionary
dict_1 = {'f_name': 'Allan', 'l_name': 'Poe'}
ser_3 = pd.Series(dict_1)

# Retrieve value for 'f_name' property from Series
print(f"fname: {ser_3['f_name']}\n")

# Get Series datatype
print(f"data type: {ser_3.dtype}\n")

ser_3

fname: Allan

data type: object



f_name    Allan
l_name      Poe
dtype: object

In [5]:
# Add Series (ser_2) together
print("ser_2 + ser_2:")
print(ser_2 + ser_2)

ser_2 + ser_2:
0    2
1    4
2    6
3    8
dtype: int64


In [6]:
# Multiply Series (ser_2) together
print("ser_2 * ser_2:")
print(ser_2 * ser_2)

ser_2 * ser_2:
0     1
1     4
2     9
3    16
dtype: int64


In [7]:
# Calculate the exponential of all elements in the input Series passing it into corresponding NumPy function.
np.exp(ser_2)

0     2.718282
1     7.389056
2    20.085537
3    54.598150
dtype: float64

In [8]:
# Attempt to perform operation on two Series for which there is no alignment between most of the labels
ser_4 = pd.Series({4: 5, 3: 6, 6: 7})
ser_2 + ser_4

0     NaN
1     NaN
2     NaN
3    10.0
4     NaN
6     NaN
dtype: float64

# DataFrames

## Creating DataFrames

In [9]:
arr_2 = np.random.randint(10, 50, size=(2, 3))
arr_2

array([[29, 25, 10],
       [46, 35, 47]])

In [10]:
df_1 = pd.DataFrame(arr_2, ['A', 'B'], ['C', 'D', 'E'])
df_1

Unnamed: 0,C,D,E
A,29,25,10
B,46,35,47


In [11]:
dict_3 = {'one': pd.Series([1., 2., 3.], index = ['a', 'b', 'c']),
         'two': pd.Series([1., 2., 3., 4.], index = ['a', 'b', 'c', 'd'])}
df_2 = pd.DataFrame(dict_3)
df_2

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [12]:
pd.DataFrame.from_dict(dict([('A', [1, 2 ,3]), ('B', [4, 5, 6])]))

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [13]:
pd.DataFrame.from_dict(dict([('A', [1, 2, 3]), ('B', [4, 5, 6])]), 
                      orient = 'index', columns = ['one', 'two', 'three'])

Unnamed: 0,one,two,three
A,1,2,3
B,4,5,6


In [14]:
df_1.shape

(2, 3)

## Editing & Retrieving data

In [137]:
# Retrieve C column of df_1
print(df_1)

df_1['C']

    C   D   E
A  28  24  36
B  37  33  40


A    28
B    37
Name: C, dtype: int64

In [138]:
# Retieve multiple columns at once (C and E)
df_1[['C', 'E']]

Unnamed: 0,C,E
A,28,36
B,37,40


In [139]:
# Retrieive A row values as a Series
df_1.loc['A']

C    28
D    24
E    36
Name: A, dtype: int64

In [140]:
# Retrieve row by index position (index: 1)
df_1.iloc[1]

C    37
D    33
E    40
Name: B, dtype: int64

In [141]:
# Retrieive cell corresponding to specific row and a column (row: A, column: C)
df_1.loc['A', 'C']

28

In [142]:
# Retrieive multiple cells identified by specific rows and columns (get values of column 'D' and 'E' of row 'A')
df_1.loc[['A'], ['D', 'E']]

Unnamed: 0,D,E
A,24,36


In [143]:
# Create new column 'Total' which will contain sum of all other column values of the same row 
df_1['Total'] = df_1['C'] + df_1['D'] + df_1['E']
df_1

Unnamed: 0,C,D,E,Total
A,28,24,36,88
B,37,33,40,110


In [144]:
# Create new column 'Mult' which will contain multiplication of all other column values of the same row 
print(df_2)
df_2['Mult'] = df_2['one'] * df_2['two']
df_2

   one  two
a  1.0  1.0
b  2.0  2.0
c  3.0  3.0
d  NaN  4.0


Unnamed: 0,one,two,Mult
a,1.0,1.0,1.0
b,2.0,2.0,4.0
c,3.0,3.0,9.0
d,,4.0,


In [145]:
# Add new row to a DataFrame df_1 by concatentation of a new DF created from dictionary
dict_2 = {'C': 44, 'D': 45, 'E': 46}
new_df = pd.DataFrame.from_dict(dict([('F', [44, 45, 46])]), orient='index', columns = ['C', 'D', 'E'])

print(df_1, '\n')
print(new_df, '\n')

pd.concat([df_1, new_df])

    C   D   E  Total
A  28  24  36     88
B  37  33  40    110 

    C   D   E
F  44  45  46 



Unnamed: 0,C,D,E,Total
A,28,24,36,88.0
B,37,33,40,110.0
F,44,45,46,


In [146]:
# Delete column 'Total'
df_1.drop('Total', axis=1, inplace=True)

df_1

Unnamed: 0,C,D,E
A,28,24,36
B,37,33,40


In [147]:
# Delete row 'B'
df_1.drop('B', axis=0, inplace=True)
df_1

Unnamed: 0,C,D,E
A,28,24,36


In [16]:
# Create a new column 'Sex' and make it the index
df_1['Sex'] = ['Men', 'Women']
df_1.set_index('Sex', inplace=True)
df_1

Unnamed: 0_level_0,C,D,E
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Men,29,25,10
Women,46,35,47


In [20]:
# Assign new column to a DataFrame (creating a new DataFrame leaving the original one untouched)
df_2.assign(div=df_2['one'] / df_2['two'])

Unnamed: 0,one,two,div
a,1.0,1.0,1.0
b,2.0,2.0,1.0
c,3.0,3.0,1.0
d,,4.0,


In [21]:
# Assign new column to a DataFrame by passing a function that would calculate column values (creating a new DataFrame leaving the original one untouched)
df_2.assign(div=lambda x: (x['one'] / x['two']))

Unnamed: 0,one,two,div
a,1.0,1.0,1.0
b,2.0,2.0,1.0
c,3.0,3.0,1.0
d,,4.0,


In [26]:
# Combine DataFrames (taking values from first DF unless there is a NaN value, in which case values are taken from second DF)
df_3 = pd.DataFrame({'A': [1., np.nan, 3., np.nan]})
print(df_3, '\n')
df_4 = pd.DataFrame({'A': [8., 9., 2., 4.]})
print(df_4, '\n')
df_3.combine_first(df_4)

     A
0  1.0
1  NaN
2  3.0
3  NaN 

     A
0  8.0
1  9.0
2  2.0
3  4.0 



Unnamed: 0,A
0,1.0
1,9.0
2,3.0
3,4.0
