# Operations

In [2]:
import numpy as np
import pandas as pd

In [3]:
df = pd.DataFrame({"column_1":[1,2,3,4],
                   "column_2":[111,222,333,222],
                   "column_3":["aaa","bbb","ccc","ddd"]})

In [4]:
df.head()

Unnamed: 0,column_1,column_2,column_3
0,1,111,aaa
1,2,222,bbb
2,3,333,ccc
3,4,222,ddd


## Unique Values

In [5]:
df["column_2"].unique()

array([111, 222, 333])

In [6]:
# number of unique values

df["column_2"].nunique()

3

In [7]:
df["column_2"].value_counts()

222    2
111    1
333    1
Name: column_2, dtype: int64

## Selecting Data

In [8]:
df[df["column_1"] > 2]

Unnamed: 0,column_1,column_2,column_3
2,3,333,ccc
3,4,222,ddd


In [9]:
# combine condition 

df[(df["column_1"] > 2) & (df['column_2'] == 333)]

Unnamed: 0,column_1,column_2,column_3
2,3,333,ccc


## Apply method

In [10]:
def times3(x):
    return x*3

In [11]:
df['column_1'].apply(times3)

0     3
1     6
2     9
3    12
Name: column_1, dtype: int64

In [12]:
# apply built-in function

df["column_3"].apply(len)

0    3
1    3
2    3
3    3
Name: column_3, dtype: int64

In [13]:
df["column_2"].apply(lambda x : x**2)

0     12321
1     49284
2    110889
3     49284
Name: column_2, dtype: int64

In [14]:
df

Unnamed: 0,column_1,column_2,column_3
0,1,111,aaa
1,2,222,bbb
2,3,333,ccc
3,4,222,ddd


In [15]:
df.columns 

Index(['column_1', 'column_2', 'column_3'], dtype='object')

In [16]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [17]:
#sort values

df.sort_values("column_2")

Unnamed: 0,column_1,column_2,column_3
0,1,111,aaa
1,2,222,bbb
3,4,222,ddd
2,3,333,ccc


- index order is different

In [18]:
df.sort_values(by="column_2")

Unnamed: 0,column_1,column_2,column_3
0,1,111,aaa
1,2,222,bbb
3,4,222,ddd
2,3,333,ccc


In [19]:
# Finding missing values

df.isnull()

Unnamed: 0,column_1,column_2,column_3
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False


In [20]:
data = {
    'Country' : [ 'USA', 'USA', 'USA', 'GBR', 'GBR', 'GBR'],
    'Branch' : [ 'one', 'one', 'two', 'two', 'one', 'one'],
    'Name' : [ 'x', 'y', 'x', 'y', 'x', 'y'],
    'Number' :     [ 20, 22, 23, 21, 32, 31]}

In [21]:
df = pd.DataFrame(data)

df

Unnamed: 0,Country,Branch,Name,Number
0,USA,one,x,20
1,USA,one,y,22
2,USA,two,x,23
3,GBR,two,y,21
4,GBR,one,x,32
5,GBR,one,y,31


In [22]:
df.pivot_table(values="Number", index = ["Country","Branch"], columns = ["Name"])

Unnamed: 0_level_0,Name,x,y
Country,Branch,Unnamed: 2_level_1,Unnamed: 3_level_1
GBR,one,32.0,31.0
GBR,two,,21.0
USA,one,20.0,22.0
USA,two,23.0,
