# Pandas Core

In [14]:
import pandas as pd

## 0 Creating DataFrames

In [44]:
# Dataframe with signal index
df = pd.DataFrame({
    "a": [4, 5, 6, 1],
    "b": [7, 8, 9, 2],
    "c": [10, 11, 12, 3]
}, index = [1, 2, 3, 4])

df = pd.DataFrame(
    [
        [4, 7, 10],
        [5, 8, 11],
        [6, 9, 12],
        [1, 2, 3]
    ],
    index = [1, 2, 3, 4],
    columns = ['a', 'b', 'c'])
df

Unnamed: 0,a,b,c
1,4,7,10
2,5,8,11
3,6,9,12
4,1,2,3


In [16]:
# Dataframe with multiple index
df2 = pd.DataFrame({
    "a": [4, 5, 6],
    "b": [7, 8, 9],
    "c": [10, 11, 12]
}, index = pd.MultiIndex.from_tuples([('d', 1), ('d', 2), ('e', 2)], names = ['n', 'v']))
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c
n,v,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
d,1,4,7,10
d,2,5,8,11
e,2,6,9,12


## 1 Subset Observations (Rows)

In [51]:
# select rows by position
df.iloc[0:2]

Unnamed: 0,a,b,c
1,4,7,10
2,5,8,11


In [52]:
# select row by index
df.ix[3]

a     6
b     9
c    12
Name: 3, dtype: int64

In [54]:
# select rows with logical condition
df.loc[df['a']>4]

Unnamed: 0,a,b,c
2,5,8,11
3,6,9,12


In [57]:
# only the specific columns 
df.loc[df['a']>4,['a','c']]

Unnamed: 0,a,c
2,5,11
3,6,12


In [39]:
# Randomly select n rows
df.sample(n=2)

Unnamed: 0,a,b,c
4,13,14,15
1,4,7,10


In [40]:
# Randomly select fraction of rows
df.sample(frac=0.5)

Unnamed: 0,a,b,c
1,4,7,10
3,6,9,12


In [37]:
# select first n rows
df.head(2)

Unnamed: 0,a,b,c
1,4,7,10
2,5,8,11


In [38]:
# select last n rows
df.tail(2)

Unnamed: 0,a,b,c
3,6,9,12
4,13,14,15


In [45]:
# Select and order top n entries
df.nlargest(2, 'c')

Unnamed: 0,a,b,c
3,6,9,12
2,5,8,11


In [47]:
# Select and order bottom n entries
df.nsmallest(2, 'c')

Unnamed: 0,a,b,c
4,1,2,3
1,4,7,10


In [48]:
# drop duplicate rows
df.drop_duplicates()

Unnamed: 0,a,b,c
1,4,7,10
2,5,8,11
3,6,9,12
4,1,2,3


## 2 Subset Variables (columns)

In [49]:
df[['a','c']]

Unnamed: 0,a,c
1,4,10
2,5,11
3,6,12
4,1,3


In [58]:
df['a']
df.a

1    4
2    5
3    6
4    1
Name: a, dtype: int64

## 3 Reshaping Data (change the layout of a data set)

In [62]:
# Gather columns into rows
df_melt = pd.melt(df)
df_melt

Unnamed: 0,variable,value
0,a,4
1,a,5
2,a,6
3,a,1
4,b,7
5,b,8
6,b,9
7,b,2
8,c,10
9,c,11


In [71]:
df_melt.pivot(columns='variable', values='value')

variable,a,b,c
0,4.0,,
1,5.0,,
2,6.0,,
3,1.0,,
4,,7.0,
5,,8.0,
6,,9.0,
7,,2.0,
8,,,10.0
9,,,11.0


In [82]:
df2.reset_index().groupby(by='n')

<pandas.core.groupby.DataFrameGroupBy object at 0x00000000093C5FD0>