### Good References ###
http://synesthesiam.com/posts/an-introduction-to-pandas.html

In [1]:
import pandas as pd
import numpy as np

## Creating dataframes

In [2]:
df = pd.DataFrame({ "roll": [1,2,3],
                   "name": ["Gopal", "Seeta", "Ahmad"],
                   "marks": [80, 90, 70],
                  "out_of": [200, 200, 200]})
df

Unnamed: 0,roll,name,marks,out_of
0,1,Gopal,80,200
1,2,Seeta,90,200
2,3,Ahmad,70,200


In [3]:
df = pd.DataFrame([[4, 9],] * 3, columns=['A', 'B'])
df

Unnamed: 0,A,B
0,4,9
1,4,9
2,4,9


## Transforming Columns

In [4]:
df.apply(np.sum, axis=0)

A    12
B    27
dtype: int64

In [5]:
df["C"] = df.apply(np.sum, axis=1)
df

Unnamed: 0,A,B,C
0,4,9,13
1,4,9,13
2,4,9,13


In [6]:
df["D"] = df.apply(lambda row: row["A"]*2, axis=1)
df

Unnamed: 0,A,B,C,D
0,4,9,13,8
1,4,9,13,8
2,4,9,13,8


## Sorting

In [7]:
df = pd.DataFrame({"day_of_week": ["Tuesday", "Monday", "Friday"],
                   "commits": [20, 30, 40]
                  })
df

Unnamed: 0,day_of_week,commits
0,Tuesday,20
1,Monday,30
2,Friday,40


In [8]:
keyorder = ["Sunday", "Monday", "Tuesday", "Thursday", "Friday", "Saturday"]
df["day"] = df.apply(lambda row: keyorder.index(row["day_of_week"]), axis=1)
df

Unnamed: 0,day_of_week,commits,day
0,Tuesday,20,2
1,Monday,30,1
2,Friday,40,4


In [9]:
df.sort_values(by= "day")

Unnamed: 0,day_of_week,commits,day
1,Monday,30,1
0,Tuesday,20,2
2,Friday,40,4


In [10]:
df.drop(["day"], axis=1)

Unnamed: 0,day_of_week,commits
0,Tuesday,20
1,Monday,30
2,Friday,40


## Remove Outliers

In [11]:
df = pd.DataFrame([[4, 9],] * 3, columns=['A', 'B'])
df

Unnamed: 0,A,B
0,4,9
1,4,9
2,4,9


In [12]:
df[np.abs(df.B-df.B.mean()) <= (3*df.B.std())]    # remove values beyond 3-sigma

Unnamed: 0,A,B
0,4,9
1,4,9
2,4,9


## sandbox