In [1]:
import numpy as np
import pandas as pd

# Series

In [2]:
l1 = [1, 2, 3, 4, 5, 6]
labels = ['a', 'b', 'c', 'd', 'e', 'f']
d1 = {"A":10, "B":20, "C":30, "D":40, "E":50}

In [3]:
s1 = pd.Series(l1)
s1

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [4]:
s1[4]

5

In [5]:
s2 = pd.Series(labels)
s2

0    a
1    b
2    c
3    d
4    e
5    f
dtype: object

In [6]:
s2[4]

'e'

In [7]:
s3 = pd.Series(data=l1, index=labels)
s3

a    1
b    2
c    3
d    4
e    5
f    6
dtype: int64

In [8]:
s3['a']

1

In [9]:
s3[0]

1

In [10]:
pd.Series(d1)

A    10
B    20
C    30
D    40
E    50
dtype: int64

# DataFrame

In [11]:
arr = np.random.randint(low=1, high=100, size=(5, 6))
arr

array([[ 6, 52, 94, 96, 24, 48],
       [34, 53,  6, 46, 78, 50],
       [20, 68, 25, 43, 25,  7],
       [77, 28, 53, 44,  6, 18],
       [25, 45, 86, 15, 20, 21]])

In [12]:
type(arr)

numpy.ndarray

In [13]:
pd.DataFrame(arr)

Unnamed: 0,0,1,2,3,4,5
0,6,52,94,96,24,48
1,34,53,6,46,78,50
2,20,68,25,43,25,7
3,77,28,53,44,6,18
4,25,45,86,15,20,21


In [14]:
df = pd.DataFrame(arr, index=["A", "B", "C", "D", "E"], columns=["U", "V", "W", "X", "Y", "Z"])
df

Unnamed: 0,U,V,W,X,Y,Z
A,6,52,94,96,24,48
B,34,53,6,46,78,50
C,20,68,25,43,25,7
D,77,28,53,44,6,18
E,25,45,86,15,20,21


In [15]:
type(df)

pandas.core.frame.DataFrame

## Grabbing Columns

In [16]:
df["X"]

A    96
B    46
C    43
D    44
E    15
Name: X, dtype: int64

In [17]:
df[["X", "Z", "V"]]

Unnamed: 0,X,Z,V
A,96,48,52
B,46,50,53
C,43,7,68
D,44,18,28
E,15,21,45


## Grabbing Rows

In [18]:
df.loc["C"]

U    20
V    68
W    25
X    43
Y    25
Z     7
Name: C, dtype: int64

In [19]:
df.loc[["A", "B", "E"]]

Unnamed: 0,U,V,W,X,Y,Z
A,6,52,94,96,24,48
B,34,53,6,46,78,50
E,25,45,86,15,20,21


In [20]:
df.iloc[2]

U    20
V    68
W    25
X    43
Y    25
Z     7
Name: C, dtype: int64

## Adding a New Column

In [21]:
df

Unnamed: 0,U,V,W,X,Y,Z
A,6,52,94,96,24,48
B,34,53,6,46,78,50
C,20,68,25,43,25,7
D,77,28,53,44,6,18
E,25,45,86,15,20,21


In [22]:
df['New'] = [10, 20, 30, 40, 50]

In [23]:
df

Unnamed: 0,U,V,W,X,Y,Z,New
A,6,52,94,96,24,48,10
B,34,53,6,46,78,50,20
C,20,68,25,43,25,7,30
D,77,28,53,44,6,18,40
E,25,45,86,15,20,21,50


In [24]:
df['New'] = [100, 200, 300, 400, 500]

In [25]:
df

Unnamed: 0,U,V,W,X,Y,Z,New
A,6,52,94,96,24,48,100
B,34,53,6,46,78,50,200
C,20,68,25,43,25,7,300
D,77,28,53,44,6,18,400
E,25,45,86,15,20,21,500


## Deleting a Column

In [26]:
df

Unnamed: 0,U,V,W,X,Y,Z,New
A,6,52,94,96,24,48,100
B,34,53,6,46,78,50,200
C,20,68,25,43,25,7,300
D,77,28,53,44,6,18,400
E,25,45,86,15,20,21,500


In [27]:
df.drop('New', axis=1)

Unnamed: 0,U,V,W,X,Y,Z
A,6,52,94,96,24,48
B,34,53,6,46,78,50
C,20,68,25,43,25,7
D,77,28,53,44,6,18
E,25,45,86,15,20,21


In [28]:
df.drop('New', axis=1, inplace=True)

In [29]:
df

Unnamed: 0,U,V,W,X,Y,Z
A,6,52,94,96,24,48
B,34,53,6,46,78,50
C,20,68,25,43,25,7
D,77,28,53,44,6,18
E,25,45,86,15,20,21


## Conditional Selection

In [30]:
df

Unnamed: 0,U,V,W,X,Y,Z
A,6,52,94,96,24,48
B,34,53,6,46,78,50
C,20,68,25,43,25,7
D,77,28,53,44,6,18
E,25,45,86,15,20,21


In [31]:
df['X']

A    96
B    46
C    43
D    44
E    15
Name: X, dtype: int64

In [32]:
df['X'] % 2 == 0

A     True
B     True
C    False
D     True
E    False
Name: X, dtype: bool

In [33]:
df[df['X'] % 2 == 0]

Unnamed: 0,U,V,W,X,Y,Z
A,6,52,94,96,24,48
B,34,53,6,46,78,50
D,77,28,53,44,6,18


In [34]:
df[df['X'] % 2 == 0]['Y']

A    24
B    78
D     6
Name: Y, dtype: int64

In [35]:
(df['X'] % 2 == 0) & (df['X'] > 50)

A     True
B    False
C    False
D    False
E    False
Name: X, dtype: bool

In [36]:
df[(df['X'] % 2 == 0) & (df['X'] > 50)]

Unnamed: 0,U,V,W,X,Y,Z
A,6,52,94,96,24,48


## Setting an Index

In [37]:
df

Unnamed: 0,U,V,W,X,Y,Z
A,6,52,94,96,24,48
B,34,53,6,46,78,50
C,20,68,25,43,25,7
D,77,28,53,44,6,18
E,25,45,86,15,20,21


In [38]:
df.reset_index()

Unnamed: 0,index,U,V,W,X,Y,Z
0,A,6,52,94,96,24,48
1,B,34,53,6,46,78,50
2,C,20,68,25,43,25,7
3,D,77,28,53,44,6,18
4,E,25,45,86,15,20,21


In [39]:
df.reset_index(inplace=True)

In [40]:
df

Unnamed: 0,index,U,V,W,X,Y,Z
0,A,6,52,94,96,24,48
1,B,34,53,6,46,78,50
2,C,20,68,25,43,25,7
3,D,77,28,53,44,6,18
4,E,25,45,86,15,20,21


In [41]:
df['States'] = "PB RJ DL CHD J&K".split()

In [42]:
"PB RJ DL CHD J&K".split()

['PB', 'RJ', 'DL', 'CHD', 'J&K']

In [43]:
df

Unnamed: 0,index,U,V,W,X,Y,Z,States
0,A,6,52,94,96,24,48,PB
1,B,34,53,6,46,78,50,RJ
2,C,20,68,25,43,25,7,DL
3,D,77,28,53,44,6,18,CHD
4,E,25,45,86,15,20,21,J&K


In [44]:
df.set_index('States')

Unnamed: 0_level_0,index,U,V,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
PB,A,6,52,94,96,24,48
RJ,B,34,53,6,46,78,50
DL,C,20,68,25,43,25,7
CHD,D,77,28,53,44,6,18
J&K,E,25,45,86,15,20,21


## Missing Values

In [45]:
d = {"A":[1, 2, 3, np.nan], 
    "B":[5, np.nan, np.nan, np.nan],
    "C":[10, 20, 30, 40], 
    "D":[np.nan, np.nan, np.nan, np.nan]}

df = pd.DataFrame(d)
df

Unnamed: 0,A,B,C,D
0,1.0,5.0,10,
1,2.0,,20,
2,3.0,,30,
3,,,40,


In [46]:
df.isnull()

Unnamed: 0,A,B,C,D
0,False,False,False,True
1,False,True,False,True
2,False,True,False,True
3,True,True,False,True


In [47]:
df.isnull().sum()

A    1
B    3
C    0
D    4
dtype: int64

In [48]:
df.dropna(axis=1)

Unnamed: 0,C
0,10
1,20
2,30
3,40


In [49]:
df.dropna(axis=1, thresh=2)

Unnamed: 0,A,C
0,1.0,10
1,2.0,20
2,3.0,30
3,,40


In [50]:
df

Unnamed: 0,A,B,C,D
0,1.0,5.0,10,
1,2.0,,20,
2,3.0,,30,
3,,,40,


In [51]:
df.fillna("FILL")

Unnamed: 0,A,B,C,D
0,1.0,5.0,10,FILL
1,2.0,FILL,20,FILL
2,3.0,FILL,30,FILL
3,FILL,FILL,40,FILL


In [52]:
df.fillna(df.mean())

Unnamed: 0,A,B,C,D
0,1.0,5.0,10,
1,2.0,5.0,20,
2,3.0,5.0,30,
3,2.0,5.0,40,


In [53]:
df.fillna(0)

Unnamed: 0,A,B,C,D
0,1.0,5.0,10,0.0
1,2.0,0.0,20,0.0
2,3.0,0.0,30,0.0
3,0.0,0.0,40,0.0


## Grouping

In [54]:
d = {"Company":["FB", "GOOGLE", "MICROSOFT", "FB", "GOOGLE", "FB", "MICROSOFT", "FB"],
    "Employee":["Sam", "Rachel", "Maddy", "Joe", "Srishti", "Shivay", "Pushpa", "Kirti"],
    "Sales":[1000, 500, 550, 2000, 890, 500, 350, 350]}

df = pd.DataFrame(d)
df

Unnamed: 0,Company,Employee,Sales
0,FB,Sam,1000
1,GOOGLE,Rachel,500
2,MICROSOFT,Maddy,550
3,FB,Joe,2000
4,GOOGLE,Srishti,890
5,FB,Shivay,500
6,MICROSOFT,Pushpa,350
7,FB,Kirti,350
