# Pandas

Provides high-performance, easy-to-use data structures and data analysis tools.

In [1]:
import pandas as pd

## Series

In [2]:
s1 = pd.Series([1,2,3], index = ['a','b','c'])
s1

a    1
b    2
c    3
dtype: int64

In [20]:
dict1 = {'a' : 'Harish', 'b' : 'Reetika'}
s2 = pd.Series(dict1)
s2

a     Harish
b    Reetika
dtype: object

In [7]:
s2.values

array(['Harish', 'Aayush'], dtype=object)

In [10]:
s2.index[0]

'a'

In [8]:
s2['a']

'Harish'

In [15]:
s2.name = 'S2'

In [16]:
s2

a    Harish
b    Aayush
Name: S2, dtype: object

In [18]:
s2.index.name = 'alpha'

In [19]:
s2

alpha
a    Harish
b    Aayush
Name: S2, dtype: object

In [21]:
s3 = pd.Series(['Hunny', 'Ritu'], index = ['a','b'])

In [23]:
s4 = s2 + ' ' + s3
s4

a    Harish Hunny
b    Reetika Ritu
dtype: object

In [27]:
pd.Series([1,2,321,2,34,2,2,1,2]).unique()

array([  1,   2, 321,  34], dtype=int64)

## DataFrames

In [32]:
df1 = pd.DataFrame({'State': ['Himachal Pradesh', 'Maharashtra', 'Karnataka'], 'Capital' : ['Shimla', 'Mumbai', 'Bengaluru']}, index = ['a', 'b', 'c'])
df1

Unnamed: 0,State,Capital
a,Himachal Pradesh,Shimla
b,Maharashtra,Mumbai
c,Karnataka,Bengaluru


In [33]:
df1.columns

Index(['State', 'Capital'], dtype='object')

In [37]:
df1.values

array([['Himachal Pradesh', 'Shimla'],
       ['Maharashtra', 'Mumbai'],
       ['Karnataka', 'Bengaluru']], dtype=object)

In [38]:
df1.index

Index(['a', 'b', 'c'], dtype='object')

In [42]:
df1['State'] # column as series

a    Himachal Pradesh
b         Maharashtra
c           Karnataka
Name: State, dtype: object

In [43]:
df1[['Capital']]

Unnamed: 0,Capital
a,Shimla
b,Mumbai
c,Bengaluru


In [51]:
df1.loc['a'] # row as series

State      Himachal Pradesh
Capital              Shimla
Name: a, dtype: object

In [52]:
df1.loc[['b']]

Unnamed: 0,State,Capital
b,Maharashtra,Mumbai


In [53]:
df1.iloc[2] # using index number

State      Karnataka
Capital    Bengaluru
Name: c, dtype: object

In [63]:
df1['Rank'] = [1,2,3] # creating a new column
df1

Unnamed: 0,State,Capital,Rank
a,Himachal Pradesh,Shimla,1
b,Maharashtra,Mumbai,2
c,Karnataka,Bengaluru,3


In [64]:
del df1['Rank'] # deleting a column
df1

Unnamed: 0,State,Capital
a,Himachal Pradesh,Shimla
b,Maharashtra,Mumbai
c,Karnataka,Bengaluru


In [65]:
df1.T # transpose

Unnamed: 0,a,b,c
State,Himachal Pradesh,Maharashtra,Karnataka
Capital,Shimla,Mumbai,Bengaluru


In [68]:
df2 = pd.DataFrame({'a' : [23,34,54,52], 'b' : [54,65,87,34]})
df2

Unnamed: 0,a,b
0,23,54
1,34,65
2,54,87
3,52,34


In [76]:
df2.sum(axis = 1)

0     77
1     99
2    141
3     86
dtype: int64

### Handling Missing Data

In [79]:
df3 = pd.DataFrame({'a': [1,2,3, None], 'b' : [4, None, 5, 9]})

In [80]:
df3

Unnamed: 0,a,b
0,1.0,4.0
1,2.0,
2,3.0,5.0
3,,9.0


In [81]:
df3.dropna() # remove rows with missing data

Unnamed: 0,a,b
0,1.0,4.0
2,3.0,5.0


In [84]:
df3.dropna(how = 'all') # drops if all the values along the axis are missing

Unnamed: 0,a,b
0,1.0,4.0
1,2.0,
2,3.0,5.0
3,,9.0


In [87]:
df3.fillna(1, inplace = True)
df3

Unnamed: 0,a,b
0,1.0,4.0
1,2.0,1.0
2,3.0,5.0
3,1.0,9.0


In [89]:
df3 = pd.DataFrame({'a': [1,2,3, None], 'b' : [4, None, 5, 9]})
df3.fillna({'a':1, 'b':0}) # column-wise fill

Unnamed: 0,a,b
0,1.0,4.0
1,2.0,0.0
2,3.0,5.0
3,1.0,9.0


In [90]:
df3.fillna(method = 'ffill')  # can also give 'limit = n' argument, it fills only n consecutive values after the gap

Unnamed: 0,a,b
0,1.0,4.0
1,2.0,4.0
2,3.0,5.0
3,3.0,9.0


In [91]:
df3.fillna(method = 'bfill')

Unnamed: 0,a,b
0,1.0,4.0
1,2.0,5.0
2,3.0,5.0
3,,9.0


### Reshaping Data

In [92]:
df4 = pd.DataFrame({'a' : [6,7,9,8,0], 'b' : [1,2,3,6,0]})

In [96]:
pd.concat([df3,df4])

Unnamed: 0,a,b
0,1.0,4.0
1,2.0,
2,3.0,5.0
3,,9.0
0,6.0,1.0
1,7.0,2.0
2,9.0,3.0
3,8.0,6.0
4,0.0,0.0


In [97]:
pd.concat([df3,df4], axis = 1)

Unnamed: 0,a,b,a.1,b.1
0,1.0,4.0,6,1
1,2.0,,7,2
2,3.0,5.0,9,3
3,,9.0,8,6
4,,,0,0


In [101]:
df3.sort_values('b', ascending = False)

Unnamed: 0,a,b
3,,9.0
2,3.0,5.0
0,1.0,4.0
1,2.0,


In [103]:
df3.rename(columns = {'a' : 'alpha'})

Unnamed: 0,alpha,b
0,1.0,4.0
1,2.0,
2,3.0,5.0
3,,9.0


In [109]:
df3.sort_index()

Unnamed: 0,a,b
0,1.0,4.0
1,2.0,
2,3.0,5.0
3,,9.0


In [111]:
df3.drop(['a'], axis = 1)

Unnamed: 0,b
0,4.0
1,
2,5.0
3,9.0


### Subset Observations

In [113]:
df3[df3['a'] > 2]

Unnamed: 0,a,b
2,3.0,5.0


In [114]:
df3.sample(frac = 0.5)

Unnamed: 0,a,b
0,1.0,4.0
1,2.0,


#### End