# Pandas Essential Functionalities

## How To Reindex Pandas Objects

In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.Series.reindex?

In [3]:
ob = pd.Series([1, 2, 3, 6], index=['d', 'b', 'a', 'c'])
ob

d    1
b    2
a    3
c    6
dtype: int64

In [4]:
ob2 = ob.reindex(index=['a', 'b', 'c', 'd'])
ob2

a    3
b    2
c    6
d    1
dtype: int64

In [5]:
ob3 = ob.reindex(index=['a', 'b', 'c', 'd', 'e'])
ob3

a    3.0
b    2.0
c    6.0
d    1.0
e    NaN
dtype: float64

In [6]:
ob4 = pd.Series([1, 2, 3], index = [0, 1, 2])
ob4

0    1
1    2
2    3
dtype: int64

In [7]:
ob4.reindex(index=np.arange(6))

0    1.0
1    2.0
2    3.0
3    NaN
4    NaN
5    NaN
dtype: float64

In [8]:
ob4.reindex(index=np.arange(6), method = 'ffill')

0    1
1    2
2    3
3    3
4    3
5    3
dtype: int64

In [9]:
ob5 = pd.DataFrame(np.arange(9).reshape((3, 3)), 
                  index=['a', 'c', 'd'], columns=['Andhra', 'Tamilnadu', 'Kerala'])
ob5

Unnamed: 0,Andhra,Tamilnadu,Kerala
a,0,1,2
c,3,4,5
d,6,7,8


In [10]:
pd.DataFrame.reindex?

In [11]:
ob6 = ob5.reindex(index=['a', 'b', 'c', 'd'])
ob6

Unnamed: 0,Andhra,Tamilnadu,Kerala
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [12]:
capitals = ['Andhra', 'Telangana', 'Tamilnadu', 'Kerala']
print(ob5)
ob5.reindex(columns=capitals)

   Andhra  Tamilnadu  Kerala
a       0          1       2
c       3          4       5
d       6          7       8


Unnamed: 0,Andhra,Telangana,Tamilnadu,Kerala
a,0,,1,2
c,3,,4,5
d,6,,7,8


In [13]:
ob5.loc[['a', 'b', 'c', 'd']]
ob5

KeyError: "['b'] not in index"

## Droping Entries From an Axis

In [None]:
import pandas as pd
import numpy as np

In [None]:
pd.Series.drop?

In [None]:
pd.DataFrame.drop?

In [None]:
data = pd.Series(np.arange(6), index=['a', 'b', 'c', 'd', 'e', 'f'])
data

In [None]:
data.drop('a')

In [None]:
n_data = data.drop('a')

In [None]:
data.drop(['a', 'd'])

In [None]:
dataframe = pd.DataFrame(np.arange(16).reshape((4, 4)), 
                  index=['a', 'b', 'd', 'e'], columns=['Karnataka', 'Andhra', 'Tamilnadu', 'Kerala'])
dataframe

In [None]:
dataframe.drop(['a', 'e'])

In [None]:
dataframe.drop('Kerala', axis=1)

In [None]:
dataframe.drop(['Kerala', 'Andhra'], axis=1)

In [None]:
dataframe.drop(['Kerala', 'Tamilnadu'], axis='columns')

In [None]:
dataframe

In [None]:
dataframe.drop(['Kerala', 'Andhra'], axis=1, inplace=True)

In [None]:
dataframe

In [None]:
dataframe

## Arithmetic and Data Alignment

In [None]:
import pandas as pd 
import numpy as np

In [None]:
ser1 = pd.Series([7, 5, 4, 1], index=['a', 'c', 'd', 'e'])
ser1

In [None]:
ser2 = pd.Series([7, 5, 4, 1, 3], index=['a', 'c', 'e', 'f', 'g'])
ser2

In [None]:
ser1 + ser2

In [None]:
df1 = pd.DataFrame(np.arange(9).reshape((3, 3)), 
                  columns=['a', 'c', 'd'], index=['Andhra', 'Tamilnadu', 'Kerala'])
df1

In [None]:
df2 = pd.DataFrame(np.arange(16).reshape((4, 4)), 
                  columns=['a', 'b', 'd', 'e'], index=['Karnataka', 'Andhra', 'Tamilnadu', 'Kerala'])
df2

In [None]:
print('df1:'); print(df1)
print('df2:'); print(df2)
df1 + df2

In [None]:
df3 = pd.DataFrame({'A': [1, 2]})
df3

In [None]:
df4 = pd.DataFrame({'B': [3, 4]})
df4

In [None]:
df3 + df4

In [None]:
df3 - df4

## Arithmetic methods with fill values

In [None]:
import pandas as pd
import numpy as np

In [None]:
df5 = pd.DataFrame(np.arange(12).reshape((3, 4)), columns=list('abcd'))
df5

In [None]:
df6 = pd.DataFrame(np.arange(20).reshape((4, 5)), columns=list('abcde'))
df6

In [None]:
df6.loc[1, 'c'] = np.nan
df6

In [None]:
print('df5:'); print(df5);
print('df6:'); print(df6)
df5 + df6

In [None]:
pd.DataFrame.add?

In [None]:
print('df5:'); print(df5)
print('df6:'); print(df6)
df5.add(df6, fill_value=0)

In [None]:
print('Before addition')

print(type(df5.loc[1, 'd'])); print(type(df6.loc[1, 'd'])); print()

print("After addition")
print(type(df5.add(df6, fill_value=0).loc[1, 'd']))

In [None]:
print('df5:'); print(df5)
print('df6:'); print(df6)
df5.add(df6, fill_value=10)

In [None]:
print(df5)
# scalar division
1/df5

In [None]:
print(df5)
df5 * 2

In [None]:
print(df5)
df5 - 3

In [None]:
print(df5)
df5.rdiv(1)

In [None]:
print(df5)
df5.rmul(2)

In [None]:
print(df5)
df5.rpow(2)

In [None]:
print(df5)
df5.radd(10)

## Operations Between DataFrame and Series

In [None]:
import pandas as pd
import numpy as np

In [None]:
df7 = pd.DataFrame(np.arange(12.).reshape((4, 3)),  
                  columns=list('bde'), index=['One', 'Two', 'Three', 'Four'])
df7

In [None]:
df7_ser = df7.iloc[0]
print(df7_ser); 
print(type(df7_ser))

In [None]:
print(df7); print(df7_ser)
df7 - df7_ser

In [None]:
df7

In [None]:
ser2 = pd.Series(range(3), index=['b', 'e', 'f'])
ser2

In [None]:
df7 + ser2

In [None]:
df7

In [None]:
df7_col = df7['b']
df7_col

In [None]:
df7.sub(df7_col, axis='index')

## Function Application and Mapping

In [None]:
import pandas as pd
import numpy as np

In [None]:
pd.DataFrame.apply?

In [None]:
pd.DataFrame.applymap?

In [None]:
df8 = pd.DataFrame(np.random.randn(4, 3),  
                  columns=list('bde'), index=['One', 'Two', 'Three', 'Four'])
df8

In [None]:
abs(df8)

In [None]:
f = lambda x: x.max()
print(df8)
df8.apply(f)

In [None]:
f = lambda x: x.min()
df8.apply(f)

In [None]:
f = lambda x: x.max() - x.min()
df8.apply(f)

In [None]:
f = lambda x: x.max()
print(df8)
df8.apply(f, axis='columns')

In [None]:
f = lambda x: x.max() - x.min()
df8.apply(f, axis='columns')

In [None]:
def f(x): 
    return pd.Series([x.max(), x.min(), x.mean()], index=['max', 'min', 'mean'])
print(df8)
df8.apply(f)

In [None]:
f = lambda x: '%.3f' %x
df8.applymap(f)

## Sorting and Ranking

In [None]:
import pandas as pd
import numpy as np

In [None]:
pd.DataFrame.sort_index?

In [None]:
pd.DataFrame.rank?

In [None]:
series = pd.Series(range(6), index=['d', 'a', 'b', 'c', 'f', 'g'])
series

In [None]:
series.sort_index(axis=0, level=None, ascending=True)

In [None]:
df9 = pd.DataFrame(np.random.randn(4, 5),  
                   columns=list('bdeac'), index=['1', '3', '2', '4'])
df9

In [None]:
df9.sort_index(axis=1, level=None, ascending=True)

In [None]:
df9.sort_index(axis=1, level=None, ascending=False)

In [None]:
df9.sort_index(axis=0, level=None, ascending=True)

In [None]:
df10 =pd.DataFrame(np.arange(12).reshape((3, 4)), 
                   index=['1', '3', '2'], 
                   columns=['d', 'a', 'b', 'c'])
df10

In [None]:
df10.sort_index(axis='index', level=None, ascending=True, by=None)

In [None]:
df10.sort_index(axis='index', level=None, ascending=True, by=['d'])

In [None]:
pd.DataFrame.sort_values?

In [None]:
print(df9)
df9.sort_values(by=['b'])

In [None]:
print(df9)
df9.sort_values(by=['d'])

In [None]:
df8

In [None]:
print(df8)
df8.rank()

In [None]:
print(df8)
df8.rank(axis='columns')

In [None]:
s = df8.loc[:, 'b']
s

In [None]:
s.rank()

## Axis Indexes with Duplicate Labels

In [None]:
import pandas as pd
import numpy as np

In [None]:
di_s = pd.Series(range(5), index=['a', 'a', 'b', 'b', 'c'])
di_s

In [None]:
pd.Index.is_unique?

In [None]:
di_s.index.is_unique

In [None]:
di_s['a']

In [None]:
di_s['b']

In [None]:
df11 = pd.DataFrame(np.random.randn(4, 3), index=['a', 'a', 'b', 'b'])
df11

In [None]:
df11.index.is_unique

In [None]:
df11.loc['a']

## How to Summarise and compute Descriptive Statistics?

In [None]:
import pandas as pd
import numpy as np

In [None]:
df12 = pd.DataFrame([[1.4, np.nan], [7.1, -4.5], [np.nan, np.nan], [0.75, -1.3]], 
                         index=['a', 'b', 'c', 'd'], 
                         columns=['one', 'two'])
df12

In [None]:
df12.sum()

In [None]:
df12.sum(axis='columns')

In [None]:
df12.mean(axis='columns', skipna=False)

In [None]:
df12.idxmax()

In [None]:
df12.cumsum()

In [None]:
pd.Series.describe?

In [None]:
df12.describe()

In [None]:
ser = pd.Series(['a', 'a', 'b', 'c'] * 4)
print(ser)
print(ser.describe())

## Unique Values, Value Counts, and Membership

In [None]:
import pandas as pd

In [None]:
ser_u = pd.Series(['c', 'a', 'd', 'a', 'a', 'b', 'b', 'c', 'c'])
ser_u

In [None]:
uniques = ser_u.unique()
uniques

In [None]:
ser_u.value_counts()

In [None]:
membership = ser_u.isin(['d', 'c'])
print(ser_u)
membership

In [None]:
ser_u[membership]

In [None]:
non_dist = pd.Series(['c', 'a', 'b', 'b', 'c', 'a'])
non_dist

In [None]:
dist = pd.Series(['c', 'b', 'a'])
dist

In [None]:
pd.Index(dist).get_indexer(non_dist)

In [None]:
df13 = pd.DataFrame({'Qu1': [1, 3, 4, 3], 
                     'Qu2': [2, 3, 1, 2], 
                     'Qu3': [1, 5, 2, 4]})
df13

In [None]:
histogram = df13.apply(pd.value_counts)
histogram

In [None]:
histogram = df13.apply(pd.value_counts).fillna(0)
histogram