# Pandas Operation:

Lets create dummy DataFrame

In [1]:
import pandas as pd

In [2]:
dataFrame = pd.DataFrame({'Column-1':[1,2,3,4],
                         'Column-2':[444,555,666,444],
                         'Column-3':['abc','def','ghi','xyz']})

# Inside pd.DataFrame(Pyhton_Dictionary)

In [3]:
dataFrame

Unnamed: 0,Column-1,Column-2,Column-3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


In [4]:
dataFrame.head()

Unnamed: 0,Column-1,Column-2,Column-3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


## Finding Unique Value In DataFrame

We can find unique value from DataFrame by 3 method. Imagine we want to find all the unique value in column-2 methods we will use for that:

#### Method 1:

In [19]:
dataFrame['Column-2'].unique()

array([444, 555, 666], dtype=int64)

This unique() method has returned us a one dimensional numpy array / 3 dimensional vector consisting of unique value in column-2. We can count number of unique value returned by passing this array to len() function.

In [20]:
len(dataFrame['Column-2'].unique())

3

#### Method 2:

In [21]:
dataFrame['Column-2'].nunique()

3

This nunique() method returned us number of unique value in column-2.

#### Method 3:

In [22]:
dataFrame['Column-2'].value_counts()

444    2
555    1
666    1
Name: Column-2, dtype: int64

This method has given us what are the unique value in column 2 & how many times each unique value appear in column 2.

# Conditional Selection

Visualize our dataFrame

In [23]:
dataFrame

Unnamed: 0,Column-1,Column-2,Column-3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


Return the dataFrame where Column-1 > 2

In [24]:
dataFrame[dataFrame['Column-1'] > 2]

Unnamed: 0,Column-1,Column-2,Column-3
2,3,666,ghi
3,4,444,xyz


Return the dataFrame where Column-1 > 2 And Column-3 == "xyz"

In [25]:
dataFrame[(dataFrame['Column-1'] > 2) & (dataFrame['Column-3'] == 'xyz')]

Unnamed: 0,Column-1,Column-2,Column-3
3,4,444,xyz


# Applying Function On DataFrame

In [26]:
def times2(x):
    return x*2

Visualize our dataFrame

In [27]:
dataFrame

Unnamed: 0,Column-1,Column-2,Column-3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


In [30]:
dataFrame['Column-1'].apply(times2)

0    2
1    4
2    6
3    8
Name: Column-1, dtype: int64

Lets say we want to count the length of string from Column-3.

In [32]:
dataFrame['Column-3'].apply(len)

0    3
1    3
2    3
3    3
Name: Column-3, dtype: int64

# Apply Lambda Expression On DataFrame

Multiply every row of Column-2 with 2, using Lambda Expresion. First visualize our dataFrame.

In [33]:
dataFrame

Unnamed: 0,Column-1,Column-2,Column-3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


In [36]:
dataFrame['Column-2'].apply(lambda x: x*2)

0     888
1    1110
2    1332
3     888
Name: Column-2, dtype: int64

# Removing Column From DataFrame

Visualize dataFrame

In [37]:
dataFrame

Unnamed: 0,Column-1,Column-2,Column-3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


Lets say we want to remove Column-3

In [38]:
dataFrame.drop('Column-3',axis=1)

Unnamed: 0,Column-1,Column-2
0,1,444
1,2,555
2,3,666
3,4,444


This will not permanently drop Column-3, for dropping permanently we have to set inplace=True.

In [40]:
dataFrame.drop(2)

Unnamed: 0,Column-1,Column-2,Column-3
0,1,444,abc
1,2,555,def
3,4,444,xyz


# Find Out Row & Columns Name In DataFrame

In [42]:
dataFrame.columns

Index(['Column-1', 'Column-2', 'Column-3'], dtype='object')

In [43]:
dataFrame.index

RangeIndex(start=0, stop=4, step=1)

# Sorting & Ordering DataFrame

In [45]:
dataFrame

Unnamed: 0,Column-1,Column-2,Column-3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


In [47]:
dataFrame.sort_values('Column-2')

Unnamed: 0,Column-1,Column-2,Column-3
0,1,444,abc
3,4,444,xyz
1,2,555,def
2,3,666,ghi


# Find Null Value In DataFrame

In [49]:
dataFrame.isnull()

Unnamed: 0,Column-1,Column-2,Column-3
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False


In [56]:
import numpy as np

In [57]:
df = pd.DataFrame({'col1':[1,2,3,np.nan],
                   'col2':[np.nan,555,666,444],
                   'col3':['abc','def','ghi','xyz']})
df.head()

Unnamed: 0,col1,col2,col3
0,1.0,,abc
1,2.0,555.0,def
2,3.0,666.0,ghi
3,,444.0,xyz


In [58]:
df.fillna('FILL')

Unnamed: 0,col1,col2,col3
0,1,FILL,abc
1,2,555,def
2,3,666,ghi
3,FILL,444,xyz


# Pivot Table

Now for this part we will use new DataFrame. So create a python dictionary.

In [59]:
dictionary = {'A':['foo','foo','foo','bar','bar','bar'],
             'B':['one','one','two','two','one','one'],
             'C':['x','y','x','y','x','y'],
             'D':[1,3,2,5,4,1],}
df = pd.DataFrame(dictionary)

In [60]:
df

Unnamed: 0,A,B,C,D
0,foo,one,x,1
1,foo,one,y,3
2,foo,two,x,2
3,bar,two,y,5
4,bar,one,x,4
5,bar,one,y,1


In [61]:
df.pivot_table(values='D',index=['A','B'],columns=['C'])

Unnamed: 0_level_0,C,x,y
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,4.0,1.0
bar,two,,5.0
foo,one,1.0,3.0
foo,two,2.0,
