___

<p style="text-align: center;"><img src="https://docs.google.com/uc?id=1lY0Uj5R04yMY3-ZppPWxqCr5pvBLYPnV" class="img-fluid" alt="CLRSWY"></p>

___

<h1><p style="text-align: center;">Pandas Lesson, Session - 3</p><h1>
    <h2><p style="text-align: center;">Part - 2</p><h2>

# Data Frames

 - ### ``DataFrames`` are the workhorse of pandas and are directly inspired by the R programming language. We can think of a DataFrame as a bunch of Series objects put together to share the same index. Let's use pandas to explore this topic!

In [1]:
import pandas as pd
import numpy as np

 - ### Creating a DataFrame using the ``list``s of data and columns

In [2]:
datam = [1, 2, 39, 67, 90]

In [3]:
datam

[1, 2, 39, 67, 90]

In [4]:
pd.DataFrame(datam, columns = ["column_name"])

Unnamed: 0,column_name
0,1
1,2
2,39
3,67
4,90


 - ### Creating a DataFrame using a ``NumPy Arrays``

In [5]:
m = np.arange(1,10).reshape((3,3))

m

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [6]:
pd.DataFrame(m, columns = ["var1", "var2", "var3"])

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [7]:
pd.DataFrame(data = m, columns = ["var1", "var2", "var3"])

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [8]:
df = pd.DataFrame(data = m, columns = ["var1", "var2", "var3"])

df

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [9]:
df.head(2)

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6


In [10]:
df

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [11]:
df.tail(1)

Unnamed: 0,var1,var2,var3
2,7,8,9


In [12]:
df.columns

Index(['var1', 'var2', 'var3'], dtype='object')

In [13]:
for i in df.columns:
    print(i)

var1
var2
var3


In [14]:
df.columns = ["new1", "new2", "new3"]

In [15]:
df

Unnamed: 0,new1,new2,new3
0,1,2,3
1,4,5,6
2,7,8,9


In [16]:
type(df)

pandas.core.frame.DataFrame

In [17]:
df.shape

(3, 3)

In [18]:
df.ndim

2

In [19]:
df.size

9

In [20]:
df.values

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [21]:
type(df.values)

numpy.ndarray

 - ### Creating a DataFrame using a ``dict``

In [22]:
s1 = np.random.randint(10, size = 5)
s2 = np.random.randint(10, size = 5)
s3 = np.random.randint(10, size = 5)

In [23]:
s1

array([3, 3, 2, 7, 2])

In [24]:
s2

array([9, 6, 9, 6, 6])

In [25]:
s3

array([8, 9, 2, 1, 6])

In [26]:
myDict = {"var1" : s1, "var2" : s2, "var3" : s3}

In [27]:
df1 = pd.DataFrame(myDict)

In [28]:
df1

Unnamed: 0,var1,var2,var3
0,3,9,8
1,3,6,9
2,2,9,2
3,7,6,1
4,2,6,6


In [29]:
pwd

'C:\\Users\\YD\\pythonic\\DAwPythonSessions'

In [30]:
# ornekcsv.csv

In [31]:
pd.read_csv("ornekcsv.csv", delimiter = ";")

Unnamed: 0,a,b,c
0,78,12,1.0
1,78,12,2.0
2,78,324,3.0
3,7,2,4.0
4,88,23,5.0
5,6,2,
6,56,11,6.0
7,7,12,7.0
8,56,21,7.0
9,346,2,8.0


In [32]:
df3 = pd.read_csv("ornekcsv.csv", delimiter = ";")

In [33]:
df3.head()

Unnamed: 0,a,b,c
0,78,12,1.0
1,78,12,2.0
2,78,324,3.0
3,7,2,4.0
4,88,23,5.0


- Simple indexing and slicing the ``DataFrames``

In [34]:
df1

Unnamed: 0,var1,var2,var3
0,3,9,8
1,3,6,9
2,2,9,2
3,7,6,1
4,2,6,6


In [35]:
df1[1:3]

Unnamed: 0,var1,var2,var3
1,3,6,9
2,2,9,2


In [36]:
df1.index

RangeIndex(start=0, stop=5, step=1)

In [37]:
[i for i in df1.index]

[0, 1, 2, 3, 4]

In [38]:
df1.index = ["a", "b", "c", "d", "e"]

In [39]:
df1

Unnamed: 0,var1,var2,var3
a,3,9,8
b,3,6,9
c,2,9,2
d,7,6,1
e,2,6,6


In [40]:
df1["b" : "d"]

Unnamed: 0,var1,var2,var3
b,3,6,9
c,2,9,2
d,7,6,1


In [41]:
"var2" in df1

True

In [42]:
"joseph" in df1

False

### Now, let's examine again the ***idexing, selection*** and ***slicing*** methods and several ***attributes*** using a different DataFrame

In [43]:
from numpy.random import randn
np.random.seed(101)

In [44]:
df3 = pd.DataFrame(randn(5,4), index = "A B C D E".split(), columns = "W X Y Z".split())

In [45]:
df3

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [46]:
pd.DataFrame(randn(5,4), "A B C D E".split(), "W X Y Z".split())

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752
E,-0.116773,1.901755,0.238127,1.996652


In [47]:
pd.DataFrame(index = 'A B C D E'.split(), data = randn(5,4), columns = 'W X Y Z'.split())

Unnamed: 0,W,X,Y,Z
A,-0.993263,0.1968,-1.136645,0.000366
B,1.025984,-0.156598,-0.031579,0.649826
C,2.154846,-0.610259,-0.755325,-0.346419
D,0.147027,-0.479448,0.558769,1.02481
E,-0.925874,1.862864,-1.133817,0.610478


## Selection and Indexing

Let's learn the various methods to grab data from a DataFrame

In [48]:
df3

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [49]:
df3["W"]

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [50]:
type(df3["W"])

pandas.core.series.Series

In [51]:
df3["W"].values

array([ 2.70684984,  0.65111795, -2.01816824,  0.18869531,  0.19079432])

In [52]:
df3["W"].ndim

1

In [53]:
df3[["W"]]

Unnamed: 0,W
A,2.70685
B,0.651118
C,-2.018168
D,0.188695
E,0.190794


In [54]:
type(df3[["W"]])

pandas.core.frame.DataFrame

In [55]:
istediğimsutunlar = ["W", "Z"]

WZ_df = df3[istediğimsutunlar]

#### DataFrame Columns are just Series

In [56]:
WZ_df

Unnamed: 0,W,Z
A,2.70685,0.503826
B,0.651118,0.605965
C,-2.018168,-0.589001
D,0.188695,0.955057
E,0.190794,0.683509


In [57]:
df3[["W", "Z"]]

Unnamed: 0,W,Z
A,2.70685,0.503826
B,0.651118,0.605965
C,-2.018168,-0.589001
D,0.188695,0.955057
E,0.190794,0.683509


In [58]:
# df3["W"]
df3.W

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [59]:
# df["ad soyad"]

In [60]:
df3

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [61]:
df3["A" : "C"]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001


In [62]:
df3["C" : "C"]

Unnamed: 0,W,X,Y,Z
C,-2.018168,0.740122,0.528813,-0.589001


___

<p style="text-align: center;"><img src="https://docs.google.com/uc?id=1lY0Uj5R04yMY3-ZppPWxqCr5pvBLYPnV" class="img-fluid" alt="CLRSWY"></p>

___

<h1><p style="text-align: center;">Pandas Lesson, Session - 4</p><h1>
    <h2><p style="text-align: center;">Part - 1</p><h2>

**Creating a new column:**

In [63]:
df3

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [64]:
df3["new"] = df3["W"] + df3["Y"]

In [65]:
df3

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [66]:
df3["new2"] = df3["X"] * df3["Z"]

In [67]:
df3

Unnamed: 0,W,X,Y,Z,new,new2
A,2.70685,0.628133,0.907969,0.503826,3.614819,0.316469
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959,-0.193496
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355,-0.435932
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542,-0.724766
E,0.190794,1.978757,2.605967,0.683509,2.796762,1.352498


### [Removing Columns & Rows](http://localhost:8888/notebooks/pythonic/DAwPythonSessions/w3resource-pandas-dataframe-drop.ipynb)

 - ### Removing Columns

In [68]:
df3

Unnamed: 0,W,X,Y,Z,new,new2
A,2.70685,0.628133,0.907969,0.503826,3.614819,0.316469
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959,-0.193496
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355,-0.435932
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542,-0.724766
E,0.190794,1.978757,2.605967,0.683509,2.796762,1.352498


In [69]:
df3.drop(["new", "new2"], axis = 1)

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [70]:
df3

Unnamed: 0,W,X,Y,Z,new,new2
A,2.70685,0.628133,0.907969,0.503826,3.614819,0.316469
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959,-0.193496
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355,-0.435932
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542,-0.724766
E,0.190794,1.978757,2.605967,0.683509,2.796762,1.352498


In [71]:
df3.drop(["new", "new2"], axis = 1, inplace = True)

In [72]:
df3

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


 - ### Removing rows

In [73]:
df3

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [74]:
df3.drop("E", axis = 0)

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


### Selecting Rows

- ### First, let's take a quick look at [`.loc[]`](http://localhost:8888/notebooks/pythonic/DAwPythonSessions/w3resource-pandas-dataframe-loc.ipynb) | [`.iloc[]`](http://localhost:8888/notebooks/pythonic/DAwPythonSessions/w3resource-pandas-dataframe-iloc.ipynb)

#### `.loc[]` → allows us to select data using **labels** (names) of rows (index) & columns

#### `.iloc[]` → allows us to select data using **index numbers** of rows (index) & columns. it's like classical indexing logic

In [75]:
m = np.random.randint(1,30, size = (10,3))
df4 = pd.DataFrame(m, columns = ["var1","var2","var3"])
df4

Unnamed: 0,var1,var2,var3
0,15,4,15
1,29,6,23
2,23,22,27
3,22,18,24
4,12,5,8
5,21,28,12
6,6,18,23
7,26,19,14
8,7,4,25
9,15,18,24


In [76]:
df4.loc[1]

var1    29
var2     6
var3    23
Name: 1, dtype: int32

In [77]:
df4.loc[1:4]

Unnamed: 0,var1,var2,var3
1,29,6,23
2,23,22,27
3,22,18,24
4,12,5,8


In [78]:
df4.iloc[1:4]

Unnamed: 0,var1,var2,var3
1,29,6,23
2,23,22,27
3,22,18,24


In [79]:
df4.index = "a b c d e f g h i j".split()

In [80]:
df4

Unnamed: 0,var1,var2,var3
a,15,4,15
b,29,6,23
c,23,22,27
d,22,18,24
e,12,5,8
f,21,28,12
g,6,18,23
h,26,19,14
i,7,4,25
j,15,18,24


In [81]:
#df4.loc[1:4]

In [82]:
df4.iloc[1:4]

Unnamed: 0,var1,var2,var3
b,29,6,23
c,23,22,27
d,22,18,24


In [83]:
df4.loc["b" : "d"]

Unnamed: 0,var1,var2,var3
b,29,6,23
c,23,22,27
d,22,18,24


In [84]:
df4

Unnamed: 0,var1,var2,var3
a,15,4,15
b,29,6,23
c,23,22,27
d,22,18,24
e,12,5,8
f,21,28,12
g,6,18,23
h,26,19,14
i,7,4,25
j,15,18,24


In [85]:
df4.iloc[1,1]

6

In [86]:
df4.loc["b":"e", "var2"]

b     6
c    22
d    18
e     5
Name: var2, dtype: int32

In [87]:
df4.loc["b":"e"][["var2"]]

Unnamed: 0,var2
b,6
c,22
d,18
e,5


In [88]:
df4.loc["b":"e", ["var2"]]

Unnamed: 0,var2
b,6
c,22
d,18
e,5


In [89]:
df4

Unnamed: 0,var1,var2,var3
a,15,4,15
b,29,6,23
c,23,22,27
d,22,18,24
e,12,5,8
f,21,28,12
g,6,18,23
h,26,19,14
i,7,4,25
j,15,18,24


In [90]:
df4.iloc[1:5,1]

b     6
c    22
d    18
e     5
Name: var2, dtype: int32

In [91]:
df4.iloc[1:5][["var2"]]

Unnamed: 0,var2
b,6
c,22
d,18
e,5


#### Let's continue to examine `.loc[]` and `.iloc[]` using ``df3`` again

In [92]:
df3

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [93]:
df3.loc["A"]

W    2.706850
X    0.628133
Y    0.907969
Z    0.503826
Name: A, dtype: float64

Or select based off of position instead of label 

In [94]:
df3.iloc[2]

W   -2.018168
X    0.740122
Y    0.528813
Z   -0.589001
Name: C, dtype: float64

In [95]:
df3.loc[["B"]]

Unnamed: 0,W,X,Y,Z
B,0.651118,-0.319318,-0.848077,0.605965


In [96]:
df3.iloc[[2]]

Unnamed: 0,W,X,Y,Z
C,-2.018168,0.740122,0.528813,-0.589001


In [97]:
df3.iloc[:, 2]

A    0.907969
B   -0.848077
C    0.528813
D   -0.933237
E    2.605967
Name: Y, dtype: float64

In [98]:
df3.iloc[:, [2]]

Unnamed: 0,Y
A,0.907969
B,-0.848077
C,0.528813
D,-0.933237
E,2.605967


In [99]:
df3[["Y"]]

Unnamed: 0,Y
A,0.907969
B,-0.848077
C,0.528813
D,-0.933237
E,2.605967


In [100]:
df3["Y"]

A    0.907969
B   -0.848077
C    0.528813
D   -0.933237
E    2.605967
Name: Y, dtype: float64

In [101]:
df3.Y

A    0.907969
B   -0.848077
C    0.528813
D   -0.933237
E    2.605967
Name: Y, dtype: float64

### Selecting subset of rows and columns

 - ### `.loc[[row labels|names], [column labels|names]]`

 - ### `.iloc[[row index numbers], [column index numbers]]`

In [102]:
df3

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [103]:
df3.loc["B","Y"]

-0.8480769834036315

In [104]:
df3.loc[["B"], ["Y"]]

Unnamed: 0,Y
B,-0.848077


In [105]:
df3.loc[["A", "B"], ["W", "Y"]]

Unnamed: 0,W,Y
A,2.70685,0.907969
B,0.651118,-0.848077


In [106]:
df3.iloc[[0, 1], [0, 2]]

Unnamed: 0,W,Y
A,2.70685,0.907969
B,0.651118,-0.848077


### Conditional Selection

An important feature of pandas is conditional selection using bracket notation, very similar to numpy:

In [107]:
df3

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [108]:
df3 > 0

Unnamed: 0,W,X,Y,Z
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True
E,True,True,True,True


In [109]:
df3[df3 > 0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [110]:
df3["W"] > 0

A     True
B     True
C    False
D     True
E     True
Name: W, dtype: bool

In [111]:
df3[df3["W"] > 0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [112]:
df3[[True, True, False, True, True]]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [113]:
df3[df3["W"] > 0]["Y"]

A    0.907969
B   -0.848077
D   -0.933237
E    2.605967
Name: Y, dtype: float64

In [114]:
df3[df3["W"] > 0][["Y"]]

Unnamed: 0,Y
A,0.907969
B,-0.848077
D,-0.933237
E,2.605967


In [115]:
df3[df3["W"] > 0][["Y", "X"]]

Unnamed: 0,Y,X
A,0.907969,0.628133
B,-0.848077,-0.319318
D,-0.933237,-0.758872
E,2.605967,1.978757


#### For two conditions you can use **|** → `or`,  **&** →  `and` with parenthesis:

In [116]:
df3

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [117]:
df3[(df3["W"] > 0) & (df3["Y"] > 1)]

Unnamed: 0,W,X,Y,Z
E,0.190794,1.978757,2.605967,0.683509


#### Conditional selection using ``.loc[]`` and ``.iloc[]``

In [118]:
df3

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [119]:
df3.loc[(df3.X > 0), ["X", "Z"]]

Unnamed: 0,X,Z
A,0.628133,0.503826
C,0.740122,-0.589001
E,1.978757,0.683509


In [120]:
df3

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [121]:
df3.loc[((df3.W > 2) | (df3.W < -2)), ["W", "Y"]]

Unnamed: 0,W,Y
A,2.70685,0.907969
C,-2.018168,0.528813


___

<p style="text-align: center;"><img src="https://docs.google.com/uc?id=1lY0Uj5R04yMY3-ZppPWxqCr5pvBLYPnV" class="img-fluid" alt="CLRSWY"></p>

___

<h1><p style="text-align: center;">Pandas Lesson, Session - 5</p><h1>
    <h2><p style="text-align: center;">The Last Part of <code>DataFrames</code></p><h2>

## More Index Details

Let's discuss some more features of indexing, including resetting the index or setting it something else. We'll also talk about index hierarchy!

In [122]:
df3

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [123]:
df3.reset_index()

Unnamed: 0,index,W,X,Y,Z
0,A,2.70685,0.628133,0.907969,0.503826
1,B,0.651118,-0.319318,-0.848077,0.605965
2,C,-2.018168,0.740122,0.528813,-0.589001
3,D,0.188695,-0.758872,-0.933237,0.955057
4,E,0.190794,1.978757,2.605967,0.683509


In [125]:
df3

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [126]:
df3.reset_index(drop = True)

Unnamed: 0,W,X,Y,Z
0,2.70685,0.628133,0.907969,0.503826
1,0.651118,-0.319318,-0.848077,0.605965
2,-2.018168,0.740122,0.528813,-0.589001
3,0.188695,-0.758872,-0.933237,0.955057
4,0.190794,1.978757,2.605967,0.683509


In [129]:
newindx = "CA NY WY OR CO".split()
newindx

['CA', 'NY', 'WY', 'OR', 'CO']

In [130]:
df3["States"] = newindx

In [131]:
df3

Unnamed: 0,W,X,Y,Z,States
A,2.70685,0.628133,0.907969,0.503826,CA
B,0.651118,-0.319318,-0.848077,0.605965,NY
C,-2.018168,0.740122,0.528813,-0.589001,WY
D,0.188695,-0.758872,-0.933237,0.955057,OR
E,0.190794,1.978757,2.605967,0.683509,CO


In [132]:
df3.set_index("States")

Unnamed: 0_level_0,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2.70685,0.628133,0.907969,0.503826
NY,0.651118,-0.319318,-0.848077,0.605965
WY,-2.018168,0.740122,0.528813,-0.589001
OR,0.188695,-0.758872,-0.933237,0.955057
CO,0.190794,1.978757,2.605967,0.683509


In [133]:
df3

Unnamed: 0,W,X,Y,Z,States
A,2.70685,0.628133,0.907969,0.503826,CA
B,0.651118,-0.319318,-0.848077,0.605965,NY
C,-2.018168,0.740122,0.528813,-0.589001,WY
D,0.188695,-0.758872,-0.933237,0.955057,OR
E,0.190794,1.978757,2.605967,0.683509,CO


In [134]:
df3.set_index("States", inplace = True)

In [135]:
df3

Unnamed: 0_level_0,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2.70685,0.628133,0.907969,0.503826
NY,0.651118,-0.319318,-0.848077,0.605965
WY,-2.018168,0.740122,0.528813,-0.589001
OR,0.188695,-0.758872,-0.933237,0.955057
CO,0.190794,1.978757,2.605967,0.683509


## Multi-Index and Index Hierarchy

Let us go over how to work with Multi-Index, first we'll create a quick example of what a Multi-Indexed DataFrame would look like:

In [136]:
# Index Levels
outside = ['M1', 'M1', 'M1', 'M2', 'M2', 'M2']
inside = [1, 2, 3, 1, 2, 3]
multi_index = list(zip(outside, inside))
multi_index

[('M1', 1), ('M1', 2), ('M1', 3), ('M2', 1), ('M2', 2), ('M2', 3)]

Now let's show how to index this! For index hierarchy we use ``df.loc[]``, if this was on the columns axis, you would just use normal bracket notation ``df[]``. Calling one level of the index returns the sub-dataframe:

In [137]:
hier_index = pd.MultiIndex.from_tuples(multi_index)

In [138]:
hier_index

MultiIndex([('M1', 1),
            ('M1', 2),
            ('M1', 3),
            ('M2', 1),
            ('M2', 2),
            ('M2', 3)],
           )

In [139]:
df5 = pd.DataFrame(np.random.randn(6,2), index = hier_index, columns = ["A", "B"])

In [140]:
df5

Unnamed: 0,Unnamed: 1,A,B
M1,1,0.687993,-0.119198
M1,2,-1.030354,1.285524
M1,3,-0.83362,-1.13958
M2,1,2.616705,0.138355
M2,2,0.565277,0.035679
M2,3,0.113275,-1.408936


In [141]:
df5.loc["M1"]

Unnamed: 0,A,B
1,0.687993,-0.119198
2,-1.030354,1.285524
3,-0.83362,-1.13958


In [143]:
df5.loc["M1"].loc[1]

A    0.687993
B   -0.119198
Name: 1, dtype: float64

In [144]:
df5.loc["M1"].loc[[1]]

Unnamed: 0,A,B
1,0.687993,-0.119198


In [145]:
df5.index

MultiIndex([('M1', 1),
            ('M1', 2),
            ('M1', 3),
            ('M2', 1),
            ('M2', 2),
            ('M2', 3)],
           )

In [146]:
df5.index.names

FrozenList([None, None])

In [147]:
df5.index.names = ["Group", "Num"]

In [148]:
df5

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Group,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
M1,1,0.687993,-0.119198
M1,2,-1.030354,1.285524
M1,3,-0.83362,-1.13958
M2,1,2.616705,0.138355
M2,2,0.565277,0.035679
M2,3,0.113275,-1.408936


### let's take a quick look at the [``.xs()``](http://localhost:8888/notebooks/w3resource-pandas-dataframe-xs.ipynb)

In [149]:
df5

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Group,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
M1,1,0.687993,-0.119198
M1,2,-1.030354,1.285524
M1,3,-0.83362,-1.13958
M2,1,2.616705,0.138355
M2,2,0.565277,0.035679
M2,3,0.113275,-1.408936


In [150]:
df5.xs("M1")

Unnamed: 0_level_0,A,B
Num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.687993,-0.119198
2,-1.030354,1.285524
3,-0.83362,-1.13958


In [151]:
df5.xs(("M1", 1))

A    0.687993
B   -0.119198
Name: (M1, 1), dtype: float64

In [152]:
df5.xs(("M1", 1), level = [0,1])

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Group,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
M1,1,0.687993,-0.119198


In [153]:
df5

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Group,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
M1,1,0.687993,-0.119198
M1,2,-1.030354,1.285524
M1,3,-0.83362,-1.13958
M2,1,2.616705,0.138355
M2,2,0.565277,0.035679
M2,3,0.113275,-1.408936


In [155]:
df5.xs(1, level = "Num")

Unnamed: 0_level_0,A,B
Group,Unnamed: 1_level_1,Unnamed: 2_level_1
M1,0.687993,-0.119198
M2,2.616705,0.138355


In [156]:
df5.xs(1, level = 1)

Unnamed: 0_level_0,A,B
Group,Unnamed: 1_level_1,Unnamed: 2_level_1
M1,0.687993,-0.119198
M2,2.616705,0.138355


### [Removing Columns & Rows](http://localhost:8888/notebooks/w3resource-pandas-dataframe-drop.ipynb) from Multi-Index DataFrame

# The End of the Session