# ** Pandas MultiIndex 

In [2]:
# Create a pandas Series from a list with mixed data types and print it
import pandas as pd

data = [10, 29.98, 'aBC', 88, 51]
series_from_list = pd.Series(data)
print(series_from_list)


0       10
1    29.98
2      aBC
3       88
4       51
dtype: object


In [3]:
# Create a pandas DataFrame from a dictionary and print it
import pandas as pd

data = {
    'Name': ['Riya', 'Ishaan', 'Sonali', 'Zahir'],
    'Age': [21, 15, 50, 55],
    'Score': [85, 90, 78, 92]
}

df = pd.DataFrame(data)
print(df)


     Name  Age  Score
0    Riya   21     85
1  Ishaan   15     90
2  Sonali   50     78
3   Zahir   55     92


In [4]:
# Multiply two pandas Series element-wise based on matching indices
import pandas as pd

s1 = pd.Series([1, 2, 3], index=["a", "b", "c"])
s2 = pd.Series([4, 5, 6], index=["b", "c", "d"])
print(s1 * s2)


a     NaN
b     8.0
c    15.0
d     NaN
dtype: float64


In [5]:
# Add two pandas Series element-wise and print the result
import pandas as pd

series_a = pd.Series([12, 14, 16])
series_b = pd.Series([3, 6, 9])
sum_series = series_a + series_b 
print(sum_series)

0    15
1    20
2    25
dtype: int64


In [6]:
# Create a pandas Series with a MultiIndex and print it
import pandas as pd

arrays = [
    ['X', 'X', 'Y', 'Y'],
    ['History', 'Physics', 'History', 'Physics']
]
index = pd.MultiIndex.from_arrays(arrays, names=('Letter', 'Subject'))

multi_s = pd.Series([75, 89, 82, 95], index=index)
print(multi_s)

Letter  Subject
X       History    75
        Physics    89
Y       History    82
        Physics    95
dtype: int64


In [7]:
# Create a pandas Series with a MultiIndex from tuples and print it
import pandas as pd  

tuples = [('X', 'History'), ('X', 'Physics'), ('Y', 'History'), ('Y', 'Physics')]
index = pd.MultiIndex.from_tuples(tuples, names=('Letter', 'Subject'))

multi_s = pd.Series([78, 91, 83, 88], index=index)
print(multi_s)


Letter  Subject
X       History    78
        Physics    91
Y       History    83
        Physics    88
dtype: int64


In [8]:
# Create a pandas Series with a MultiIndex using the Cartesian product and print it
import pandas as pd

index = pd.MultiIndex.from_product(
    [['X', 'Y'], ['History', 'Physics']],
    names=('Letter', 'Subject')
)

multi_s = pd.Series([76, 84, 91, 87], index=index)
print(multi_s)


Letter  Subject
X       History    76
        Physics    84
Y       History    91
        Physics    87
dtype: int64


In [9]:
df = pd.DataFrame({
    'Alphabet': ['A', 'A', 'B', 'B'],
    'Subject': ['Math', 'Science', 'Math', 'Science']
})
index = pd.MultiIndex.from_frame(df, names=('Alphabet', 'Subject'))

multi_s = pd.Series([90, 85, 88, 92], index=index)
print(multi_s)

Alphabet  Subject
A         Math       90
          Science    85
B         Math       88
          Science    92
dtype: int64


## Demonstration of Pandas MultiIndex: Creation, Access, Slicing, Reordering, Grouping, and MultiIndex Columns


In [12]:
import pandas as pd
import numpy as np

#  MULTI-INDEX FROM ARRAYS 
# Create a MultiIndex Series from arrays
arrays = [
    ["A", "A", "B", "B"],
    ["Math", "Science", "Math", "Science"]
]
index = pd.MultiIndex.from_arrays(arrays, names=("Alphabet", "Subject"))
multi_s = pd.Series([90, 85, 88, 92], index=index)
print("MultiIndex Series from arrays:\n", multi_s, "\n")

# MULTI-INDEX FROM TUPLES 
# Create a MultiIndex Series from tuples
tuples = [
    ("A", "Math"), ("A", "Science"),
    ("B", "Math"), ("B", "Science")
]
index2 = pd.MultiIndex.from_tuples(tuples, names=("Alphabet", "Subject"))
multi_s2 = pd.Series([70, 75, 80, 82], index=index2)
print("MultiIndex Series from tuples:\n", multi_s2, "\n")

#  MULTI-INDEX FROM PRODUCT
# Create a MultiIndex Series using the Cartesian product of iterables
iterables = [["A", "B"], ["Math", "Science"]]
index3 = pd.MultiIndex.from_product(iterables, names=("Alphabet", "Subject"))
multi_s3 = pd.Series(np.random.randint(60, 100, size=4), index=index3)
print("MultiIndex Series from product:\n", multi_s3, "\n")


MultiIndex Series from arrays:
 Alphabet  Subject
A         Math       90
          Science    85
B         Math       88
          Science    92
dtype: int64 

MultiIndex Series from tuples:
 Alphabet  Subject
A         Math       70
          Science    75
B         Math       80
          Science    82
dtype: int64 

MultiIndex Series from product:
 Alphabet  Subject
A         Math       77
          Science    85
B         Math       72
          Science    76
dtype: int64 



In [13]:
# ACCESSING DATA
# Access all subjects for a letter and specific element
print("Access all subjects for 'A':\n", multi_s.loc["A"], "\n")
print("Access specific element (B, Science):\n", multi_s.loc[("B", "Science")], "\n")



Access all subjects for 'A':
 Subject
Math       90
Science    85
dtype: int64 

Access specific element (B, Science):
 92 



In [14]:
#SLICING
# Demonstrate slicing by rows and partial columns
print("Slicing from A to B:\n", multi_s.loc["A":"B"], "\n")
print("Partial slice for all Math:\n", multi_s.loc[:, "Math"], "\n")


Slicing from A to B:
 Alphabet  Subject
A         Math       90
          Science    85
B         Math       88
          Science    92
dtype: int64 

Partial slice for all Math:
 Alphabet
A    90
B    88
dtype: int64 



In [15]:
#REORDERING LEVELS
# Swap and reorder levels of MultiIndex Series
print("Swapping levels:\n", multi_s.swaplevel(), "\n")
print("Reordering levels:\n", multi_s3.reorder_levels(["Subject", "Alphabet"]), "\n")


Swapping levels:
 Subject  Alphabet
Math     A           90
Science  A           85
Math     B           88
Science  B           92
dtype: int64 

Reordering levels:
 Subject  Alphabet
Math     A           77
Science  A           85
Math     B           72
Science  B           76
dtype: int64 



In [16]:
# MULTI-INDEX AUTOMATIC CONSTRUCTION 
# Create MultiIndex Series and DataFrame automatically
multi_s_auto = pd.Series(
    np.random.randn(4),
    index=pd.MultiIndex.from_arrays([["A", "A", "B", "B"], ["X", "Y", "X", "Y"]])
)
print("MultiIndex Series constructed automatically:\n", multi_s_auto, "\n")

df_auto = pd.DataFrame(
    np.random.randn(4, 2),
    index=pd.MultiIndex.from_arrays([["Group1", "Group1", "Group2", "Group2"],
                                     ["One", "Two", "One", "Two"]]),
    columns=["Score1", "Score2"]
)
print("DataFrame with MultiIndex automatically:\n", df_auto, "\n")


MultiIndex Series constructed automatically:
 A  X    0.24273
   Y   -0.52901
B  X    0.33211
   Y   -0.48367
dtype: float64 

DataFrame with MultiIndex automatically:
               Score1    Score2
Group1 One  0.441388 -0.019941
       Two  0.032215 -0.416864
Group2 One  0.794166 -0.434857
       Two  0.093989 -0.238952 



In [17]:
 # GROUPBY AND RE-INDEX 
# Group by MultiIndex level and reindex to align with original
df = pd.DataFrame({
    "Math": [85, 90, 95, 80],
    "Science": [82, 88, 92, 84]
}, index=pd.MultiIndex.from_arrays([["A", "A", "B", "B"], ["one", "two", "one", "two"]]))
print("Original DataFrame:\n", df, "\n")

mean_by_group = df.groupby(level=0).mean()
print("Mean by group:\n", mean_by_group, "\n")

aligned = mean_by_group.reindex(df.index, level=0)
print("Reindexed to align with original index:\n", aligned, "\n")


Original DataFrame:
        Math  Science
A one    85       82
  two    90       88
B one    95       92
  two    80       84 

Mean by group:
    Math  Science
A  87.5     85.0
B  87.5     88.0 

Reindexed to align with original index:
        Math  Science
A one  87.5     85.0
  two  87.5     85.0
B one  87.5     88.0
  two  87.5     88.0 



In [18]:
# CROSS-SECTION
# Select rows from a specific level
print("Cross-section for level 'two':\n", df.xs("two", level=1), "\n")

#SORTING MULTI-INDEX
# Shuffle MultiIndex Series and sort by index
unsorted = multi_s_auto.sample(frac=1)
print("Unsorted MultiIndex Series:\n", unsorted, "\n")
print("Sorted by index:\n", unsorted.sort_index(), "\n")


Cross-section for level 'two':
    Math  Science
A    90       88
B    80       84 

Unsorted MultiIndex Series:
 B  X    0.33211
   Y   -0.48367
A  Y   -0.52901
   X    0.24273
dtype: float64 

Sorted by index:
 A  X    0.24273
   Y   -0.52901
B  X    0.33211
   Y   -0.48367
dtype: float64 



In [19]:
# MultiIndex columns + remove_unused_levels 
# Create a DataFrame with MultiIndex columns and demonstrate removing unused levels
arrays = [["Score", "Score"], ["Math", "Science"]]
multi_columns = pd.MultiIndex.from_arrays(arrays, names=["Type", "Subject"])

df_multi = pd.DataFrame(
    [[85, 82], [90, 88], [95, 92], [80, 84]],
    columns=multi_columns
)
print("DataFrame with MultiIndex columns:\n", df_multi, "\n")

# Drop one column (Math only) and remove unused levels
sub_df = df_multi[("Score", "Math")].to_frame()
print("Columns before removing unused levels:\n", sub_df.columns, "\n")
print("Columns after remove_unused_levels:\n", sub_df.columns.remove_unused_levels(), "\n")


DataFrame with MultiIndex columns:
 Type    Score        
Subject  Math Science
0          85      82
1          90      88
2          95      92
3          80      84 

Columns before removing unused levels:
 MultiIndex([('Score', 'Math')],
           ) 

Columns after remove_unused_levels:
 MultiIndex([('Score', 'Math')],
           ) 

