In [1]:
import numpy as np
import pandas as pd

In [2]:
ex1 = pd.read_csv('ex1.csv', index_col=0)
ex1.head()

Unnamed: 0_level_0,B,C,D,E,F
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1,2,3,4,5
6,7,8,9,10,11
12,13,14,15,16,17
18,19,20,21,22,23
24,25,26,27,28,29


In [3]:
# set_index() doesn't keep the current index.
# If you want to keep the current index, you need to manually create a new column and copy into
# it values from the index attribute.

# Let's say that we don't want to index the DataFrame by A, but instead by the 
# F. But let's assume we want to keep the A for later. So, lets preserve the A
# into a new column. We can do this using the indexing operator on the string that has
# the column label. Then we can use the set_index to set index of the column to F

# indexed data into its column
ex1['A'] = ex1.index
# set the index to another column
ex1 = ex1.set_index('F')
ex1.head()

Unnamed: 0_level_0,B,C,D,E,A
F,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
5,1,2,3,4,0
11,7,8,9,10,6
17,13,14,15,16,12
23,19,20,21,22,18
29,25,26,27,28,24


In [4]:
# Get rid of the index completely by calling the function reset_index(). This promotes the
# index into a column and creates a default numbered index
ex1 = ex1.reset_index()
ex1.head()

Unnamed: 0,F,B,C,D,E,A
0,5,1,2,3,4,0
1,11,7,8,9,10,6
2,17,13,14,15,16,12
3,23,19,20,21,22,18
4,29,25,26,27,28,24


In [53]:
ex2 = pd.read_csv('ex2.csv')
ex2.head()

Unnamed: 0,A,B,C,D,E,F
0,0,1,2,3,there,5
1,12,7,8,2,delilah,11
2,12,13,14,16,what,17
3,18,19,20,26,its,25
4,30,25,26,26,like,29


In [54]:
# see a list of all the unique values in a given column
ex2['A'].unique()

array([ 0, 12, 18, 30, 36, 42, 48, 54], dtype=int64)

In [55]:
# Creating a column names we want to keep
col_to_keep = ['A', 'C', 'E', 'F']
ex2 = ex2[col_to_keep]
ex2.head()

Unnamed: 0,A,C,E,F
0,0,2,there,5
1,12,8,delilah,11
2,12,14,what,17
3,18,20,its,25
4,30,26,like,29


In [56]:
# Load the data and set the index to be a combination of columns
ex2 = ex2.set_index(['C', 'E'])
ex2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,F
C,E,Unnamed: 2_level_1,Unnamed: 3_level_1
2,there,0,5
8,delilah,12,11
14,what,12,17
20,its,18,25
26,like,30,29


In [60]:
# If we want to see the results from 20 in its
ex2.loc[20, 'its']

A    18
F    25
Name: (20, its), dtype: int64

In [62]:
# If you are interested in comparing two data, we can pass a list of tuples describing the indices
# we wish to query into loc

ex2.loc[ [(14, 'what'), (26, 'like')] ]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,F
C,E,Unnamed: 2_level_1,Unnamed: 3_level_1
14,what,12,17
26,like,30,29
