# Arithmetic & Data Alignment

In [20]:
#must run this block before start coding
import numpy as np
import pandas as pd

In [21]:
cities = pd.DataFrame(np.arange(16).reshape(4, 4),
                     index = ['Karachi', 'Lahore', 'Peshawar', 'Islamabad'],
                     columns = ['one', 'two', 'three', 'four'])

cities

Unnamed: 0,one,two,three,four
Karachi,0,1,2,3
Lahore,4,5,6,7
Peshawar,8,9,10,11
Islamabad,12,13,14,15


In [22]:
#loc method, loc method works on labels
print(cities.loc['Karachi', ['one', 'two']])
#from your given row and column, loc print/show the value against it
#if you wanna acces two rows data then,
print()
cities.loc[['Lahore', 'Islamabad'], ['one', 'four']]

one    0
two    1
Name: Karachi, dtype: int32



Unnamed: 0,one,four
Lahore,4,7
Islamabad,12,15


In [23]:
#iloc method, we can access data with numbers/indexing
#this is also called fancy indexing
print(cities.iloc[1, [0, 2, 3]])   #[row=1, column=[0,2,3]]
#to show whole data, we use
print()
print(cities.iloc[:])
print("Or")
print(cities)   #or you just direct print the whole dataframe

one      4
three    6
four     7
Name: Lahore, dtype: int32

           one  two  three  four
Karachi      0    1      2     3
Lahore       4    5      6     7
Peshawar     8    9     10    11
Islamabad   12   13     14    15
Or
           one  two  three  four
Karachi      0    1      2     3
Lahore       4    5      6     7
Peshawar     8    9     10    11
Islamabad   12   13     14    15


In [24]:
#or if you want any filtered data, like some columns and all rows type data then,
print(cities.iloc[:, :3])   #all rows but three columns
print("Or")
print(cities.iloc[:3, :3])  #for three by three dataframe

           one  two  three
Karachi      0    1      2
Lahore       4    5      6
Peshawar     8    9     10
Islamabad   12   13     14
Or
          one  two  three
Karachi     0    1      2
Lahore      4    5      6
Peshawar    8    9     10


In [25]:
#if you wanna filtered only rows than there is no need to do any operation with columns
print(cities.iloc[:3])   #3 means only three rows from your dataframe
#and all the columns will be as it is in your accurate dataframe

          one  two  three  four
Karachi     0    1      2     3
Lahore      4    5      6     7
Peshawar    8    9     10    11


In [26]:
#addition and multiplication on any dataframe is perform by matching using index
#data Allignment means when data rows and columns will match its generate proper output, otherwise NaN values returns

cities_one = pd.DataFrame(np.arange(16).reshape(4, 4),
                     index = ['Karachi', 'Lahore', 'Peshawar', 'Islamabad'],
                     columns = ['A', 'B', 'D', 'E'])

cities_one

Unnamed: 0,A,B,D,E
Karachi,0,1,2,3
Lahore,4,5,6,7
Peshawar,8,9,10,11
Islamabad,12,13,14,15


In [27]:
cities_two = pd.DataFrame(np.arange(12).reshape(3, 4),
                     index = ['Karachi', 'Multan', 'Peshawar'],
                     columns = ['B', 'C', 'D', 'F'])

cities_two

Unnamed: 0,B,C,D,F
Karachi,0,1,2,3
Multan,4,5,6,7
Peshawar,8,9,10,11


In [28]:
#addition of two different dataframes are operate by index, so that only karachi index has add values of karachi as well as
#lahore index add only values of lahore, as well as column is also be matched, like A must be match with A of another dataframe
#as well as B must be match of B from anoher dataframe

cities_one + cities_two
#lots of NaN, because there is many miss match values in both of the dataframe
#dataframe match index and column, rows and columns is also increase in that process

Unnamed: 0,A,B,C,D,E,F
Islamabad,,,,,,
Karachi,,1.0,,4.0,,
Lahore,,,,,,
Multan,,,,,,
Peshawar,,17.0,,20.0,,


# Arithmetic Methods with Fill Values

In [38]:
#when we add or multiply two dataframes and the rows and columns didn't match than we have to fill those NaN values
#fill_value put same numeric value against NaN, after adding some value against NaN the add operation is perform
#fill_value only work on two same rows and columns, if first dataframe has a value 3 on 1st row and 1st column then 2nd 
#dataframe also must have some value on 1st row and 1st column

cities_three = pd.DataFrame(np.arange(16).reshape(4, 4),
                     index = ['Sukkhar', 'Rawalpindi', 'Peshawar', 'Multan'],
                     columns = ['A', 'C', 'D', 'E'])

#cities_three
cities_three.loc['Rawalpindi', 'C'] = np.nan
cities_three

Unnamed: 0,A,C,D,E
Sukkhar,0,1.0,2,3
Rawalpindi,4,,6,7
Peshawar,8,9.0,10,11
Multan,12,13.0,14,15


In [39]:
cities_four = pd.DataFrame(np.arange(16).reshape(4, 4),
                     index = ['Karachi', 'Rawalpindi', 'Peshawar', 'Islamabad'],
                     columns = ['A', 'B', 'D', 'F'])

cities_four

Unnamed: 0,A,B,D,F
Karachi,0,1,2,3
Rawalpindi,4,5,6,7
Peshawar,8,9,10,11
Islamabad,12,13,14,15


In [40]:
#fill_value put same numeric value against NaN, after adding some value against NaN the add operation is perform
#fill_value only work on two same rows and columns, if first dataframe has a value 3 on 1st row and 1st column then 2nd 
#dataframe also must have some value on 1st row and 1st column
#Otherwise, value will be NaN
cities_add = cities_three.add(cities_four, fill_value=0)
cities_add

Unnamed: 0,A,B,C,D,E,F
Islamabad,12.0,13.0,,14.0,,15.0
Karachi,0.0,1.0,,2.0,,3.0
Multan,12.0,,13.0,14.0,15.0,
Peshawar,16.0,9.0,9.0,20.0,11.0,11.0
Rawalpindi,8.0,5.0,,12.0,7.0,7.0
Sukkhar,0.0,,1.0,2.0,3.0,


In [55]:
#Proper Example to define fill_value easily

df1 = pd.DataFrame(np.arange(12.).reshape((3, 4)),
                   columns=list('abcd'))
df2 = pd.DataFrame(np.arange(20.).reshape((4, 5)),
                    columns=list('abcde'))
print(df1)
print()
print(df2)

     a    b     c     d
0  0.0  1.0   2.0   3.0
1  4.0  5.0   6.0   7.0
2  8.0  9.0  10.0  11.0

      a     b     c     d     e
0   0.0   1.0   2.0   3.0   4.0
1   5.0   6.0   7.0   8.0   9.0
2  10.0  11.0  12.0  13.0  14.0
3  15.0  16.0  17.0  18.0  19.0


In [56]:
#df1
df1.loc[1, 'b'] = np.nan   #placing NaN value on some position
df1

Unnamed: 0,a,b,c,d
0,0.0,1.0,2.0,3.0
1,4.0,,6.0,7.0
2,8.0,9.0,10.0,11.0


In [57]:
#df2
df2.loc[1, 'c'] = np.nan     #placing NaN value on some position
df2

Unnamed: 0,a,b,c,d,e
0,0.0,1.0,2.0,3.0,4.0
1,5.0,6.0,,8.0,9.0
2,10.0,11.0,12.0,13.0,14.0
3,15.0,16.0,17.0,18.0,19.0


In [58]:
#now, the value is easily replaced NaN
df3 = df1.add(df2, fill_value=0)
df3

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,4.0
1,9.0,6.0,6.0,15.0,9.0
2,18.0,20.0,22.0,24.0,14.0
3,15.0,16.0,17.0,18.0,19.0


In [62]:
#if you substract the series of pandas into any dataframe than, the values of series is substract to each and every row of the
#dataframe

frame = pd.DataFrame(np.arange(12).reshape(4,3),
                    columns=list('abc'),
                    index=['Karachi', 'Lahore', 'Peshawar', 'Islamabad'])

frame

Unnamed: 0,a,b,c
Karachi,0,1,2
Lahore,3,4,5
Peshawar,6,7,8
Islamabad,9,10,11


In [63]:
#print the whole values of the 'karachi' by making it series
series = frame.iloc[0]
series

a    0
b    1
c    2
Name: Karachi, dtype: int32

In [65]:
#now, generating a new series, this series is different from the upper series
Series_new = pd.Series([1,3,5], index=list('abc'))
print(Series_new.index, Series_new.values)  #print values and index of the new series
Series_new

Index(['a', 'b', 'c'], dtype='object') [1 3 5]


a    1
b    3
c    5
dtype: int64

In [66]:
#this operation is performing in row wise rule, it is row wise broadcasting operation
#remember that index of series must be match with columns of dataframe
frame - Series_new   #values from the series will subs from each row of the dataframe

Unnamed: 0,a,b,c
Karachi,-1,-2,-3
Lahore,2,1,0
Peshawar,5,4,3
Islamabad,8,7,6
