In [4]:
# import libraries
import numpy as np 
import pandas as pd

In [5]:
data = {
    "City": ["Delhi", "Delhi", "Mumbai", "Mumbai", "Delhi", "Mumbai"],
    "Gender": ["M", "F", "M", "F", "M", "F"],
    "Sales": [200, 150, 300, 250, 180, 270],
    "Month": ["Jan", "Jan", "Jan", "Feb", "Feb", "Feb"]
}

df = pd.DataFrame(data)

In [7]:
df.head(6)

Unnamed: 0,City,Gender,Sales,Month
0,Delhi,M,200,Jan
1,Delhi,F,150,Jan
2,Mumbai,M,300,Jan
3,Mumbai,F,250,Feb
4,Delhi,M,180,Feb
5,Mumbai,F,270,Feb


PIVOT TABLES

In [9]:
# to calculate mean of sales based on city
table = pd.pivot_table(df, values="Sales", index="City")
print(table)

             Sales
City              
Delhi   176.666667
Mumbai  273.333333


In [11]:
# to calculate mean of genders based on city
table = pd.pivot_table(df, values="Sales", index="City", columns="Gender")
print(table)

Gender      F      M
City                
Delhi   150.0  190.0
Mumbai  260.0  300.0


In [12]:
# use aggregate functions
table = pd.pivot_table(df, values="Sales", index="City", aggfunc=["mean", "max", "min"])
print(table)

              mean   max   min
             Sales Sales Sales
City                          
Delhi   176.666667   200   150
Mumbai  273.333333   300   250


In [13]:
# multiple key values
table = pd.pivot_table(df, values="Sales", index=["City", "Month"], aggfunc="sum")
print(table)

              Sales
City   Month       
Delhi  Feb      180
       Jan      350
Mumbai Feb      520
       Jan      300


In [14]:
# handling missing values
table = pd.pivot_table(df, values="Sales", index="City", columns="Month", fill_value="0")
print(table)

Month     Feb    Jan
City                
Delhi   180.0  175.0
Mumbai  260.0  300.0


In [15]:
# using margins
table = pd.pivot_table(df, values="Sales", index="City", columns="Month", aggfunc="sum", margins=True)
print(table)

Month   Feb  Jan   All
City                  
Delhi   180  350   530
Mumbai  520  300   820
All     700  650  1350


In [16]:
# filtering in pivot tables
table = pd.pivot_table(df[df["City"]=="Delhi"], values="Sales", index="Month", aggfunc="sum")
print(table)

       Sales
Month       
Feb      180
Jan      350


DATA RESHAPING

In [17]:
df = pd.DataFrame({
    "Name": ["Alice", "Bob"],
    "Math": [88, 90],
    "Science" : [85, 92]
})

df.head()

Unnamed: 0,Name,Math,Science
0,Alice,88,85
1,Bob,90,92


In [23]:
# melt
df = pd.melt(df, id_vars=["Name"], value_vars=["Math", "Science"], var_name="Subject", value_name="Score")
df.head()

Unnamed: 0,Name,Subject,Score
0,Alice,Math,88
1,Bob,Math,90
2,Alice,Science,85
3,Bob,Science,92


In [25]:
# pivot
df.pivot(index="Name", columns="Subject", values="Score")

Subject,Math,Science
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,88,85
Bob,90,92


In [26]:
# pivot table
pd.pivot_table(df, index="Name", columns="Subject", values="Score", aggfunc="mean")

Subject,Math,Science
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,88.0,85.0
Bob,90.0,92.0


In [27]:
df.stack()

0  Name         Alice
   Subject       Math
   Score           88
1  Name           Bob
   Subject       Math
   Score           90
2  Name         Alice
   Subject    Science
   Score           85
3  Name           Bob
   Subject    Science
   Score           92
dtype: object

In [28]:
df.unstack()

Name     0      Alice
         1        Bob
         2      Alice
         3        Bob
Subject  0       Math
         1       Math
         2    Science
         3    Science
Score    0         88
         1         90
         2         85
         3         92
dtype: object

In [29]:
df.set_index("Name")

Unnamed: 0_level_0,Subject,Score
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,Math,88
Bob,Math,90
Alice,Science,85
Bob,Science,92


In [30]:
df.reset_index()

Unnamed: 0,index,Name,Subject,Score
0,0,Alice,Math,88
1,1,Bob,Math,90
2,2,Alice,Science,85
3,3,Bob,Science,92
