# 1) Pandas multiindex

- umoznuje pracovat s viac-rozmernymi datami, je to tzv. hierarchicke indexovanie
- obsahuje viacero urovni indexov, ktore su navzajom prepojene (parent-child-relationship)

- hierarchicke indexovanie: - stlpec 'Continent' je rodicom (parent) stlpca 'Country'
  ![image.png](attachment:image.png)


## 1.1) Create Multiindex


In [None]:
import pandas as pd

# create a dictionary
data = {
    "Continent": [
        "North America",
        "Europe",
        "Asia",
        "North America",
        "Asia",
        "Europe",
        "North America",
        "Asia",
        "Europe",
        "Asia",
    ],
    "Country": [
        "United States",
        "Germany",
        "China",
        "Canada",
        "Japan",
        "France",
        "Mexico",
        "India",
        "United Kingdom",
        "Nepal",
    ],
    "Population": [
        331002651,
        83783942,
        1439323776,
        37742154,
        126476461,
        65273511,
        128932753,
        1380004385,
        67886011,
        29136808,
    ],
}

# create dataframe from dictionary
df = pd.DataFrame(data)

# sort the data by continent
df.sort_values("Continent", inplace=True)
print("Sorted Dataframe:\n", df)

# create a multiindex
df.set_index(["Continent", "Country"], inplace=True)
print("\nMultiindex Dataframe:\n", df)

Sorted Dataframe:
        Continent         Country  Population
2           Asia           China  1439323776
4           Asia           Japan   126476461
7           Asia           India  1380004385
9           Asia           Nepal    29136808
1         Europe         Germany    83783942
5         Europe          France    65273511
8         Europe  United Kingdom    67886011
0  North America   United States   331002651
3  North America          Canada    37742154
6  North America          Mexico   128932753

Multiindex Dataframe:
                               Population
Continent     Country                   
Asia          China           1439323776
              Japan            126476461
              India           1380004385
              Nepal             29136808
Europe        Germany           83783942
              France            65273511
              United Kingdom    67886011
North America United States    331002651
              Canada            37742154
           

## 1.2) Access rows with multiindex


In [None]:
import pandas as pd

# create a dictionary
data = {
    "Continent": [
        "North America",
        "Europe",
        "Asia",
        "North America",
        "Asia",
        "Europe",
        "North America",
        "Asia",
        "Europe",
        "Asia",
    ],
    "Country": [
        "United States",
        "Germany",
        "China",
        "Canada",
        "Japan",
        "France",
        "Mexico",
        "India",
        "United Kingdom",
        "Nepal",
    ],
    "Population": [
        331002651,
        83783942,
        1439323776,
        37742154,
        126476461,
        65273511,
        128932753,
        1380004385,
        67886011,
        29136808,
    ],
}

# create dataframe from dictionary
df = pd.DataFrame(data)

# sort the data by continent
df.sort_values("Continent", inplace=True)
print("Sorted DataFrame:\n", df)

# create a multiindex
df.set_index(["Continent", "Country"], inplace=True)
print("\nMultiindex DataFrame:\n", df)

# access all entries under Asia
asia = df.loc["Asia"]

# access Canada
# We need to provide the full hierarchical index in the form
# of a tuple in order to access a particular row
canada = df.loc["North America", "Canada"]

print("\nAsia\n", asia)
print("\nCanada\n", canada)


Sorted DataFrame:
        Continent         Country  Population
2           Asia           China  1439323776
4           Asia           Japan   126476461
7           Asia           India  1380004385
9           Asia           Nepal    29136808
1         Europe         Germany    83783942
5         Europe          France    65273511
8         Europe  United Kingdom    67886011
0  North America   United States   331002651
3  North America          Canada    37742154
6  North America          Mexico   128932753

Multiindex DataFrame:
                               Population
Continent     Country                   
Asia          China           1439323776
              Japan            126476461
              India           1380004385
              Nepal             29136808
Europe        Germany           83783942
              France            65273511
              United Kingdom    67886011
North America United States    331002651
              Canada            37742154
           

## 1.3) Multiindex from arrays

- **from_arrays()** method


In [None]:
import pandas as pd

# create arrays
continent = [
    "Asia",
    "Asia",
    "Asia",
    "Asia",
    "Europe",
    "Europe",
    "Europe",
    "North America",
    "North America",
    "North America",
]
country = [
    "China",
    "India",
    "Japan",
    "Nepal",
    "France",
    "Germany",
    "United Kingdom",
    "Canada",
    "Mexico",
    "United States",
]
population = [
    1439323776,
    1380004385,
    126476461,
    29136808,
    65273511,
    83783942,
    67886011,
    37742154,
    128932753,
    331002651,
]

# create array of arrays
index_arrays = [continent, country]

# create multiindex from array
multi_index = pd.MultiIndex.from_arrays(index_arrays, names=["Continent", "Country"])

# create dataframe using multiindex
df = pd.DataFrame({"Population": population}, index=multi_index)

print(df)


                              Population
Continent     Country                   
Asia          China           1439323776
              India           1380004385
              Japan            126476461
              Nepal             29136808
Europe        France            65273511
              Germany           83783942
              United Kingdom    67886011
North America Canada            37742154
              Mexico           128932753
              United States    331002651
