In [None]:
"""
    Theory
        These are powerful features that make Pandas suitable for complex data analysis:
            1. MultiIndex (Hierarchical Indexing) → Multiple levels of indexing in 
            rows/columns.
            2. Crosstab → Frequency tables (like Excel pivot).
            3. Pivot Tables → Advanced summarization (group + aggregation).
            4. Window Functions (Rolling, Expanding) → Used in time-series 
            (moving averages, trends).
"""

In [1]:
import pandas as pd

In [11]:
# A) MultiIndex (Hierarchical Indexing)
arrays = [
    ["Math","Math","Science","Scince"],
    ["Test1","Test2","Test1","Test2"]
]

index = pd.MultiIndex.from_arrays(arrays,names=("Subject", "Exam"))
data = [80,90,68,78]
df = pd.DataFrame([data], columns=index)
print(df)
print("\n")
#Accessing data:
print(df["Math"]) # All Math exams
print("\n")

print(df["Science"]["Test1"]) # Science Test1 only

Subject  Math       Science Scince
Exam    Test1 Test2   Test1  Test2
0          80    90      68     78


Exam  Test1  Test2
0        80     90


0    68
Name: Test1, dtype: int64


In [24]:
# B) Crosstab (Frequency table)
"""
    A crosstab (cross-tabulation) is a table showing the frequency (count) 
    of occurrences between two or more categorical variables.
"""

import pandas as pd
data = {
"Gender": ["Male", "Female", "Male", "Female", "Male", "Female"],
"Passed": ["Yes", "No", "Yes", "Yes", "No", "No"]
}
df = pd.DataFrame(data)

#create crosstab
result = pd.crosstab(df["Gender"], df["Passed"])
print(result)
print("\n")

#Add totals (margin) (we can add total in each row and column)
res = pd.crosstab(df["Gender"], df["Passed"], margins=True)
print(res)
print("\n")

#Crosstabs with percentage (normalize)
res = pd.crosstab(df["Gender"], df["Passed"], normalize=True)
print(res)



Passed  No  Yes
Gender         
Female   2    1
Male     1    2


Passed  No  Yes  All
Gender              
Female   2    1    3
Male     1    2    3
All      3    3    6


Passed        No       Yes
Gender                    
Female  0.333333  0.166667
Male    0.166667  0.333333


In [28]:
#Multiple Columns
data = {
"Department": ["CS", "CS", "IT", "IT", "ECE", "ECE"],
"Gender": ["M", "F", "M", "F", "M", "F"],
"Passed": ["Y", "N", "Y", "Y", "N", "Y"]
}

df2 = pd.DataFrame(data)

# Crosstab by Department & Gender vs Passed
result = pd.crosstab(index=[df["Department"], df["Gender"]],
columns=df["Passed"])
print(result)

Passed             N  Y
Department Gender      
CS         F       1  0
           M       0  1
ECE        F       0  1
           M       1  0
IT         F       0  1
           M       0  1


In [None]:
# C) Pivote Table
"""
    A Pivot Table is used to summarize, group, and analyze data just like in Excel.
    It allows you to compute things like:
         Average marks per department
         Total sales per region
         Count of employees by gender
"""

In [30]:
import pandas as pd
data = {
"Department": ["CS", "CS", "IT", "IT", "ECE", "ECE"],
"Gender": ["M", "F", "M", "F", "M", "F"],
"Marks": [85, 90, 80, 70, 95, 88]
}
df = pd.DataFrame(data)

In [40]:
#create pivot table
pivot = pd.pivot_table(df, values="Marks", index="Department", columns="Gender")
print(pivot)
print("\n")

#Using aggregate function
piv = pd.pivot_table(df, values="Marks", index="Department",
                           columns="Gender", aggfunc="sum")
print(piv)
print("\n")

#using multiple Aggregation
pivot = pd.pivot_table(df, values="Marks", index="Department",
aggfunc=["mean", "max", "min"])
print(pivot)
print("\n")

#Multiple values
data = {
"Department": ["CS", "CS", "IT", "IT", "ECE", "ECE"],
"Gender": ["M", "F", "M", "F", "M", "F"],
"Marks": [85, 90, 80, 70, 95, 88],
"Attendance": [90, 95, 85, 80, 100, 92]
}
df = pd.DataFrame(data)
pivot = pd.pivot_table(df,values=["Marks", "Attendance"],index="Department",
                       columns="Gender",aggfunc="mean",margins = True) #margin to print all
print(pivot)


Gender         F     M
Department            
CS          90.0  85.0
ECE         88.0  95.0
IT          70.0  80.0


Gender       F   M
Department        
CS          90  85
ECE         88  95
IT          70  80


            mean   max   min
           Marks Marks Marks
Department                  
CS          87.5    90    85
ECE         91.5    95    88
IT          75.0    80    70


           Attendance                             Marks                      
Gender              F           M        All          F          M        All
Department                                                                   
CS               95.0   90.000000  92.500000  90.000000  85.000000  87.500000
ECE              92.0  100.000000  96.000000  88.000000  95.000000  91.500000
IT               80.0   85.000000  82.500000  70.000000  80.000000  75.000000
All              89.0   91.666667  90.333333  82.666667  86.666667  84.666667


In [49]:
# D) Window Function
"""
    Window functions perform calculations over a defined “window” of data — like last 3
    values, or all values up to now.
    They are mainly used to calculate moving averages, cumulative sums, etc.
    In Pandas, two common window functions are:
        1. Rolling — fixed-size moving window
        2. Expanding — expanding (growing) window
"""

import pandas as pd
# Sample data
data = {"Day": [1, 2, 3, 4, 5, 6, 7],
"Sales": [100, 120, 130, 150, 170, 160, 180]}
df = pd.DataFrame(data)

# Rolling mean with window size 3
df["Rolling_Mean"] = df["Sales"].rolling(window=3).mean()
print(df)

   # Explanation:
    #    First 2 rows are NaN because we need 3 data points to calculate the average.
     #   From the 3rd row onward, it averages the current + previous 2 values.


   Day  Sales  Rolling_Mean
0    1    100           NaN
1    2    120           NaN
2    3    130    116.666667
3    4    150    133.333333
4    5    170    150.000000
5    6    160    160.000000
6    7    180    170.000000


In [51]:
# Expanding mean (cumulative average)
df["Expanding_Mean"] = df["Sales"].expanding().mean()
print(df)

    #Explanation:
        #   At Day 1 → Mean of [100] = 100
        #   At Day 2 → Mean of [100,120] = 110
        #    At Day 3 → Mean of [100,120,130] = 116.7
        #    It keeps including all previous values cumulatively

   Day  Sales  Rolling_Mean  Expanding_Mean
0    1    100           NaN      100.000000
1    2    120           NaN      110.000000
2    3    130    116.666667      116.666667
3    4    150    133.333333      125.000000
4    5    170    150.000000      134.000000
5    6    160    160.000000      138.333333
6    7    180    170.000000      144.285714


In [None]:
"""
    Difference Between Rolling and Expanding
Feature                 Rolling                     Expanding
Window Size             Fixed                       Growing
Example                 Last 3 days’ avg            From Day 1 to current day avg
Use Case                Short-term trend            Overall cumulative trend
First few values        NaN (till full window)      No NaN (starts immediately)

"""