## Index Preservation

In [1]:
import numpy as np
import pandas as pd
rand = np.random.RandomState(42)
ser1 = pd.Series(rand.randint(10,size=4))
print(ser1)

0    6
1    3
2    7
3    4
dtype: int32


In [2]:
df1 = pd.DataFrame(rand.randint(10,size=(3,4)),
                  columns = ['a', 'b', 'c', 'd'])
print(df1)

   a  b  c  d
0  6  9  2  6
1  7  4  3  7
2  7  2  5  4


In [3]:
np.exp(ser1)
print(np.multiply(df1, 10))

    a   b   c   d
0  60  90  20  60
1  70  40  30  70
2  70  20  50  40


## Index Alignment

In [4]:
# index allignment in Series
A = pd.Series([1,2,3], index=[0,1,2])
B = pd.Series([10,20,30], index=[1,2,3])
print(A);print(B)
print(A.add(B))

0    1
1    2
2    3
dtype: int64
1    10
2    20
3    30
dtype: int64
0     NaN
1    12.0
2    23.0
3     NaN
dtype: float64


In [5]:
# add() method with fill_value
A.add(B, fill_value=0)

0     1.0
1    12.0
2    23.0
3    30.0
dtype: float64

In [6]:
# index Alignment in DataFrame 
C = pd.DataFrame(rand.randint(10, size=(2,2)),
                columns = ['a', 'b'])
D = pd.DataFrame(rand.randint(10, size=(3,3)),
                columns = ['a', 'b', 'c'])
print(C)
print(D)
print(C.add(D))

   a  b
0  1  7
1  5  1
   a  b  c
0  4  0  9
1  5  8  0
2  9  2  6
      a    b   c
0   5.0  7.0 NaN
1  10.0  9.0 NaN
2   NaN  NaN NaN


In [7]:
# add() method with fill_value
print(C.add(D, fill_value=0))

      a    b    c
0   5.0  7.0  9.0
1  10.0  9.0  0.0
2   9.0  2.0  6.0


## Mathematical Functions

### Pandas Addition : add()

In [8]:
df = pd.DataFrame({'speed': [80, 90, 110],
                  'weight': [250,200, 150]},
                 index = ['Audi', 'Jaguar', 'BMW'])
df
df.add(27)

Unnamed: 0,speed,weight
Audi,107,277
Jaguar,117,227
BMW,137,177


In [9]:
# addition of two dataframes 
df1 = pd.DataFrame({'speed': [80, 90, 110],
                  'weight': [250,200, 150]},
                 index = ['Audi', 'Jaguar', 'BMW'])
df1
df.add(df1)

Unnamed: 0,speed,weight
Audi,160,500
Jaguar,180,400
BMW,220,300


In [10]:
df + 3

Unnamed: 0,speed,weight
Audi,83,253
Jaguar,93,203
BMW,113,153


In [11]:
df.sub([15, 30], axis='columns')

Unnamed: 0,speed,weight
Audi,65,220
Jaguar,75,170
BMW,95,120


In [12]:
df.sub(pd.Series([7, 9, 11], index=['Audi', 'Jaguar', 'BMW']),
       axis = 'index')

Unnamed: 0,speed,weight
Audi,73,243
Jaguar,81,191
BMW,99,139


In [13]:
df - [6,9]

Unnamed: 0,speed,weight
Audi,74,241
Jaguar,84,191
BMW,104,141


### Pandas Multiply : mul()

In [14]:
df1 = pd.DataFrame({'speed': [50, 75, 100]},
                  index=['Audi', 'Jaguar', 'BMW'])
df1
# res = df.mul(df1)
# df
# res

Unnamed: 0,speed
Audi,50
Jaguar,75
BMW,100


In [15]:
res = df.mul(df1)
df
res

Unnamed: 0,speed,weight
Audi,4000,
Jaguar,6750,
BMW,11000,


In [16]:
# use fill_value parameter
df
df.mul(df1, fill_value=0)

Unnamed: 0,speed,weight
Audi,4000,0.0
Jaguar,6750,0.0
BMW,11000,0.0


In [17]:
# perform multiplication operation using "*"
df


Unnamed: 0,speed,weight
Audi,80,250
Jaguar,90,200
BMW,110,150


In [18]:
df1

Unnamed: 0,speed
Audi,50
Jaguar,75
BMW,100


In [19]:
df * df1

Unnamed: 0,speed,weight
Audi,4000,
Jaguar,6750,
BMW,11000,


### Pandas Division : div()

In [20]:
df
df.div(10)

Unnamed: 0,speed,weight
Audi,8.0,25.0
Jaguar,9.0,20.0
BMW,11.0,15.0


In [23]:
import pandas as pd
df_multindex = pd.DataFrame({'no gears': [4,5,3,3,8,6],
                            'Speed': [360,180,360,360,540,720]},
                           index=[['Sedan', 'Sedan', 'Sedan', 'Hatchback','Hatchback','Hatchback'],
                                 ['BMW', 'Audi', 'Bently', 'Mercedes','Jaguar','Mini Cooper']])
df_multindex

Unnamed: 0,Unnamed: 1,no gears,Speed
Sedan,BMW,4,360
Sedan,Audi,5,180
Sedan,Bently,3,360
Hatchback,Mercedes,3,360
Hatchback,Jaguar,8,540
Hatchback,Mini Cooper,6,720


In [22]:
df
df.div(df_multindex, level=1,fill_value=2)

Unnamed: 0,Unnamed: 1,Speed,no gears,speed,weight
Sedan,BMW,0.005556,0.5,55.0,75.0
Sedan,Audi,0.011111,0.4,40.0,125.0
Sedan,Bently,0.005556,0.666667,,
Hatchback,Mercedes,0.005556,0.666667,,
Hatchback,Jaguar,0.003704,0.25,45.0,100.0
Hatchback,Mini Cooper,0.002778,0.333333,,


### Pandas Sum : sum()

In [27]:
# sum function with multindex dataframe 
df_sum = pd.MultiIndex.from_arrays([
    ['Sedan', 'Hatchback','Sedan','Hatchback'],
    ['BMW', 'Mini Cooper', 'Audi', 'Aston Martin']],
   names = ['designs', 'companies'])
cars = pd.Series([3,6,9,18], name = 'types_of_Cars', index=df_sum)
cars.sum()

36

In [28]:
# sum function with level parameter 
cars.sum(level='designs')

  cars.sum(level='designs')


designs
Sedan        12
Hatchback    24
Name: types_of_Cars, dtype: int64

In [29]:
cars.sum(level=0)

  cars.sum(level=0)


designs
Sedan        12
Hatchback    24
Name: types_of_Cars, dtype: int64

In [30]:
cars.sum(level=1)

  cars.sum(level=1)


companies
BMW              3
Mini Cooper      6
Audi             9
Aston Martin    18
Name: types_of_Cars, dtype: int64

### Pandas Aggregate : agg()

In [31]:
df = pd.DataFrame([[15,22,37],
                 [49,np.nan,64],
                 [np.nan,89,99],
                 [53,np.nan,71]],
                 columns = ['P', 'Q', 'R'])
df

Unnamed: 0,P,Q,R
0,15.0,22.0,37
1,49.0,,64
2,,89.0,99
3,53.0,,71


In [32]:
df.agg(['sum', 'min'])

Unnamed: 0,P,Q,R
sum,117.0,111.0,271
min,15.0,22.0,37


In [33]:
# different agg() functions on each column
df.agg({'P':['sum', 'min'], 'Q':['min', 'max']})

Unnamed: 0,P,Q
sum,117.0,
min,15.0,22.0
max,,89.0


In [34]:
# aggregating over columns
df.agg("mean", axis="columns")

0    24.666667
1    56.500000
2    94.000000
3    62.000000
dtype: float64

### Pandas DROP : DROP()

In [35]:
import numpy as np 
df = pd.DataFrame(np.arange(15).reshape(3,5),
            columns = ['A', 'B', 'C', 'D', 'E'])
df

Unnamed: 0,A,B,C,D,E
0,0,1,2,3,4
1,5,6,7,8,9
2,10,11,12,13,14


In [37]:
df.drop(['A','E'], axis=1)

Unnamed: 0,B,C,D
0,1,2,3
1,6,7,8
2,11,12,13


In [38]:
df.drop(columns=['A', 'E'])

Unnamed: 0,B,C,D
0,1,2,3
1,6,7,8
2,11,12,13


In [39]:
df.drop([0,2])

Unnamed: 0,A,B,C,D,E
1,5,6,7,8,9


In [41]:
# drop function over multi index dataframe
midx = pd.MultiIndex(levels = [['BMW', 'Jaguar', 'Merecedes'],
                              ['speed', 'weight', 'length']],
                              codes = [[0,0,0,1,1,1,2,2,2],
                                      [0,1,2,0,1,2,0,1,2]])
midx
df = pd.DataFrame(index=midx, columns = ['big', 'small'],
                 data = [[125, 130], [200, 100], [10,6], [300, 200],
                        [150,70], [8,6], [320, 250],
                         [75, 65], [12, 10]])
df

Unnamed: 0,Unnamed: 1,big,small
BMW,speed,125,130
BMW,weight,200,100
BMW,length,10,6
Jaguar,speed,300,200
Jaguar,weight,150,70
Jaguar,length,8,6
Merecedes,speed,320,250
Merecedes,weight,75,65
Merecedes,length,12,10


In [42]:
df.drop(index='Jaguar', columns = 'small')

Unnamed: 0,Unnamed: 1,big
BMW,speed,125
BMW,weight,200
BMW,length,10
Merecedes,speed,320
Merecedes,weight,75
Merecedes,length,12


In [43]:
df.drop(index='length', level=1)

Unnamed: 0,Unnamed: 1,big,small
BMW,speed,125,130
BMW,weight,200,100
Jaguar,speed,300,200
Jaguar,weight,150,70
Merecedes,speed,320,250
Merecedes,weight,75,65


### Pandas Dropna : dropna()

In [45]:
df = pd.DataFrame({"name": ['George', 'James', 'Maxwell', 'Micheal', 'Clarke'],
                  "subject_liked": [np.nan, 'Geography', 'History', np.nan, 'English'],
                  "born": [pd.NaT, pd.Timestamp("1985-04-02"), pd.NaT, pd.Timestamp("1998-07-09"),
                         pd.Timestamp("2005-10-15"),]})
df

Unnamed: 0,name,subject_liked,born
0,George,,NaT
1,James,Geography,1985-04-02
2,Maxwell,History,NaT
3,Micheal,,1998-07-09
4,Clarke,English,2005-10-15


In [46]:
df.dropna()

Unnamed: 0,name,subject_liked,born
1,James,Geography,1985-04-02
4,Clarke,English,2005-10-15


In [47]:
# dropping NA values NA values by using columns
df.dropna(axis='columns')

Unnamed: 0,name
0,George
1,James
2,Maxwell
3,Micheal
4,Clarke


In [48]:
# using 'how' parameter
df.dropna(how = 'all')

Unnamed: 0,name,subject_liked,born
0,George,,NaT
1,James,Geography,1985-04-02
2,Maxwell,History,NaT
3,Micheal,,1998-07-09
4,Clarke,English,2005-10-15


In [49]:
# using thrash parameter of dropna function
df.dropna(thresh=2)

Unnamed: 0,name,subject_liked,born
1,James,Geography,1985-04-02
2,Maxwell,History,NaT
3,Micheal,,1998-07-09
4,Clarke,English,2005-10-15


In [50]:
# using subset parameter in pandas dropna()
df.dropna(subset=['name', 'born'])

Unnamed: 0,name,subject_liked,born
1,James,Geography,1985-04-02
3,Micheal,,1998-07-09
4,Clarke,English,2005-10-15
