# Multi-Dimensional Arrays + DataFrame

In [16]:
# Preparing a Multi-Dim Array.

groups = ['G1','G1','G1','G2','G2','G2']

number_of_groups = [1,2,3,1,2,3]

In [17]:
# Zip groups and number_of_groups and convert it into a list

index = list(zip(groups,number_of_groups))

index

[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]

In [18]:
import pandas as pd

index_values = pd.MultiIndex.from_tuples(index)

index_values

MultiIndex([('G1', 1),
            ('G1', 2),
            ('G1', 3),
            ('G2', 1),
            ('G2', 2),
            ('G2', 3)],
           )

In [19]:
# Creating DataFrame Object

from numpy.random import randn

df = pd.DataFrame(data=randn(6,3), index=index_values, columns=['A','B','C'])

df

Unnamed: 0,Unnamed: 1,A,B,C
G1,1,-0.262776,0.093377,1.656729
G1,2,0.323697,0.183879,0.49011
G1,3,-0.701421,-0.164712,0.349989
G2,1,-1.714396,0.797822,0.042254
G2,2,0.147024,1.121407,0.664219
G2,3,-0.085082,1.237569,0.896328


### Reading cols from multi dim 

In [20]:
# Reading 'A' Column
df.A

G1  1   -0.262776
    2    0.323697
    3   -0.701421
G2  1   -1.714396
    2    0.147024
    3   -0.085082
Name: A, dtype: float64

In [21]:
# Reading 'A' and 'C' columns

df[['A','C']]

Unnamed: 0,Unnamed: 1,A,C
G1,1,-0.262776,1.656729
G1,2,0.323697,0.49011
G1,3,-0.701421,0.349989
G2,1,-1.714396,0.042254
G2,2,0.147024,0.664219
G2,3,-0.085082,0.896328


In [22]:
# Reading G1 Records

df.loc['G1']

Unnamed: 0,A,B,C
1,-0.262776,0.093377,1.656729
2,0.323697,0.183879,0.49011
3,-0.701421,-0.164712,0.349989


In [23]:
# Reading G2 Records

df.loc['G2']

Unnamed: 0,A,B,C
1,-1.714396,0.797822,0.042254
2,0.147024,1.121407,0.664219
3,-0.085082,1.237569,0.896328


In [29]:
# Reading Group - G2 and Row 2 Records

df.loc['G2'].loc[2]

A    0.147024
B    1.121407
C    0.664219
Name: 2, dtype: float64

In [35]:
# Reding Group - G2 and Row 2 and col - B value

df.loc['G2'].loc[2]['B']

np.float64(1.1214071928284455)

### Index Names

In [36]:
df.index.names

FrozenList([None, None])

####  Note:  None because we have not assigned any, by default they are None.

## Assigning names to index

In [39]:
df.index.names = ['Groups','Group Numbers']

In [40]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
Groups,Group Numbers,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
G1,1,-0.262776,0.093377,1.656729
G1,2,0.323697,0.183879,0.49011
G1,3,-0.701421,-0.164712,0.349989
G2,1,-1.714396,0.797822,0.042254
G2,2,0.147024,1.121407,0.664219
G2,3,-0.085082,1.237569,0.896328


In [41]:
df.index.names

FrozenList(['Groups', 'Group Numbers'])

### What is df.xs() in pandas?

xs stands for “cross-section”.

* It is a method used to select data at a particular level of a MultiIndex (hierarchical index) or by label.

* Mainly useful for multi-level indexed DataFrames but works with normal indices too.

#### Syntax: df.xs(key, axis=0, level=None, drop_level=True)

* key — label of the row (or column) to select

* axis — 0 for rows (default), 1 for columns

* level — If the index is MultiIndex, specify which level to use

* drop_level — whether to drop the level from the result (default True)

In [42]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
Groups,Group Numbers,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
G1,1,-0.262776,0.093377,1.656729
G1,2,0.323697,0.183879,0.49011
G1,3,-0.701421,-0.164712,0.349989
G2,1,-1.714396,0.797822,0.042254
G2,2,0.147024,1.121407,0.664219
G2,3,-0.085082,1.237569,0.896328


In [44]:
# If i want to print Group Number 1 vaues from both G1 and G2 Groups.

# This is easy to do with 'xs' function.

df.xs(1,level='Group Numbers')

Unnamed: 0_level_0,A,B,C
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,-0.262776,0.093377,1.656729
G2,-1.714396,0.797822,0.042254


In [46]:
# If i want to print Group Number 3 vaues from both G1 and G2 Groups.

# This is easy to do with 'xs' function.

df.xs(3, level='Group Numbers')

Unnamed: 0_level_0,A,B,C
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,-0.701421,-0.164712,0.349989
G2,-0.085082,1.237569,0.896328


In [47]:
# If i want to print Group Number 3 and G1 and G2 Groups and Column 'B' values

# This is easy to do with 'xs' function.

df.xs(3, level='Group Numbers')['B']

Groups
G1   -0.164712
G2    1.237569
Name: B, dtype: float64

In [51]:
# To Print 1 value

df.xs(3, level='Group Numbers')['B'].loc['G2']

np.float64(1.2375692880920348)