# DataFrame

<a> <img src='df.png' align='left'/> </a>

In [13]:
import pandas as pd

In [14]:
df = pd.DataFrame(data=[[2,4,6,-1],[3,-4,2,-1],[-10,3,5,-7]],
            columns=['C1','C2','C3','C4'],
            index=['R1','R2','R3'])

In [15]:
df

Unnamed: 0,C1,C2,C3,C4
R1,2,4,6,-1
R2,3,-4,2,-1
R3,-10,3,5,-7


In [17]:
df.head(2)

Unnamed: 0,C1,C2,C3,C4
R1,2,4,6,-1
R2,3,-4,2,-1


In [20]:
df['newC'] = df['C1'] + 5

In [21]:
df

Unnamed: 0,C1,C2,C3,C4,newC
R1,2,4,6,-1,7
R2,3,-4,2,-1,8
R3,-10,3,5,-7,-5


In [27]:
df.drop('newC', axis=1, inplace=True)

In [29]:
df.drop('R1', axis=0, inplace=True)

In [30]:
df

Unnamed: 0,C1,C2,C3,C4
R2,3,-4,2,-1
R3,-10,3,5,-7


# Select

In [31]:
df

Unnamed: 0,C1,C2,C3,C4
R2,3,-4,2,-1
R3,-10,3,5,-7


In [33]:
df[['C2','C4']]

Unnamed: 0,C2,C4
R2,-4,-1
R3,3,-7


In [None]:
.loc(row,column) .iloc(row, column) integer

In [34]:
df

Unnamed: 0,C1,C2,C3,C4
R2,3,-4,2,-1
R3,-10,3,5,-7


In [42]:
df.loc['R2',['C2','C3']]

C2   -4
C3    2
Name: R2, dtype: int64

In [45]:
df.iloc[1,1:3]

C2    3
C3    5
Name: R3, dtype: int64

# Index, Multiindex

In [46]:
df = pd.DataFrame(data=[[2,4,6,-1],[3,-4,2,-1],[-10,3,5,-7]],
            columns=['C1','C2','C3','C4'],
            index=['R1','R2','R3'])

In [47]:
df

Unnamed: 0,C1,C2,C3,C4
R1,2,4,6,-1
R2,3,-4,2,-1
R3,-10,3,5,-7


In [48]:
df.index

Index(['R1', 'R2', 'R3'], dtype='object')

In [49]:
df.reset_index()

Unnamed: 0,index,C1,C2,C3,C4
0,R1,2,4,6,-1
1,R2,3,-4,2,-1
2,R3,-10,3,5,-7


In [50]:
df.set_index('C3', inplace=True)

In [51]:
df

Unnamed: 0_level_0,C1,C2,C4
C3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
6,2,4,-1
2,3,-4,-1
5,-10,3,-7


In [52]:
df['C2']

C3
6    4
2   -4
5    3
Name: C2, dtype: int64

In [53]:
df

Unnamed: 0_level_0,C1,C2,C4
C3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
6,2,4,-1
2,3,-4,-1
5,-10,3,-7


In [55]:
df.loc[5,:]

C1   -10
C2     3
C4    -7
Name: 5, dtype: int64

In [56]:
df.index

Int64Index([6, 2, 5], dtype='int64', name='C3')

# MultiIndex



In [58]:
import numpy as np
df = pd.DataFrame(data=np.array([['Math','C1','s1',93],
                            ['Science','C1','s2',78],
                            ['English','C1','s3',86],
                            ['Math','C2','s4',58],
                            ['English','C2','s5',71],
                            ['Science','C2','s6',69]]), columns=['Subject','Class','Student','MaxScore'])

In [59]:
df

Unnamed: 0,Subject,Class,Student,MaxScore
0,Math,C1,s1,93
1,Science,C1,s2,78
2,English,C1,s3,86
3,Math,C2,s4,58
4,English,C2,s5,71
5,Science,C2,s6,69


In [60]:
df.index

RangeIndex(start=0, stop=6, step=1)

In [63]:
df.set_index(['Class','Subject'], inplace=True)

In [64]:
df.index

MultiIndex(levels=[['C1', 'C2'], ['English', 'Math', 'Science']],
           labels=[[0, 0, 0, 1, 1, 1], [1, 2, 0, 1, 0, 2]],
           names=['Class', 'Subject'])

In [65]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Student,MaxScore
Class,Subject,Unnamed: 2_level_1,Unnamed: 3_level_1
C1,Math,s1,93
C1,Science,s2,78
C1,English,s3,86
C2,Math,s4,58
C2,English,s5,71
C2,Science,s6,69


In [68]:
df.loc['C2'].loc['English']

Student     s5
MaxScore    71
Name: English, dtype: object