In [None]:
## Crosstab


In [None]:
# %% [markdown]
# <h1 style="color:blue" align="center">Crosstab Tutorial</h1>

# %%
import pandas as pd
df = pd.read_excel("survey.xls")
df

# %%
pd.crosstab(df.Nationality,df.Handedness)

# %%
pd.crosstab(df.Sex,df.Handedness)

# %% [markdown]
# <h2 style="color:purple">Margins</h2>

# %%
pd.crosstab(df.Sex,df.Handedness, margins=True)

# %% [markdown]
# <h2 style="color:purple">Multi Index Column and Rows</h2>

# %%
pd.crosstab(df.Sex, [df.Handedness,df.Nationality], margins=True)

# %%
pd.crosstab([df.Nationality, df.Sex], [df.Handedness], margins=True)

# %% [markdown]
# <h2 style="color:purple">Normalize</h2>

# %%
pd.crosstab(df.Sex, df.Handedness, normalize='index')

# %% [markdown]
# <h2 style="color:purple">Aggfunc and Values</h2>

# %%
import numpy as np
pd.crosstab(df.Sex, df.Handedness, values=df.Age, aggfunc=np.average)




In [None]:
## Melt

In [None]:
# %% [markdown]
# # <font color="purple"><h3 align="center">Reshape pandas dataframe using melt</h3></font>

# %%
import pandas as pd
df = pd.read_csv("weather.csv")
df

day,chicago,chennai,berlin
Monday,32,75,41
Tuesday,30,77,43
Wednesday,28,75,45
Thursday,22,82,38
Friday,30,83,30
Saturday,20,81,45
Sunday,25,77,47


# %%
melted = pd.melt(df, id_vars=["day"], var_name='city', value_name='temperature')
melted




In [None]:
## Stack

In [None]:
# %% [markdown]
# <h1 style="color:blue">Reshape dataframe using stack/unstack</h1>

# %%
import pandas as pd
df = pd.read_excel("stocks.xlsx",header=[0,1])
df

# %%
df.stack()

# %%

df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}, index=['first', 'second'])
df
stacked_df = df.stack()
stacked_df

unstacked_df = stacked_df.unstack()
unstacked_df



# %%
df.stack(level=0)

# %%
df_stacked=df.stack()
df_stacked

# %%
df_stacked.unstack()

# %% [markdown]
# <h1 style="color:blue">3 levels of column headers</h1>

# %%
df2 = pd.read_excel("stocks_3_levels.xlsx",header=[0,1,2])
df2

# %%
df2.stack()

# %%
df2.stack(level=0)

# %%
df2.stack(level=1)

# %%


# %% [markdown]
# # Other

# %%
import pandas as pd

def demonstrate_stack():
    # Sample DataFrame
    data = {
        'A': [1, 2, 3],
        'B': [4, 5, 6],
        'C': [7, 8, 9]
    }
    df = pd.DataFrame(data)
    print("Original DataFrame:")
    print(df)
    
    # Using stack
    stacked_df = df.stack()
    print("\nStacked DataFrame:")
    print(stacked_df)

def demonstrate_stack_multiindex():
    # Creating a MultiIndex DataFrame
    arrays = [['A', 'A', 'B', 'B'], ['One', 'Two', 'One', 'Two']]
    index = pd.MultiIndex.from_tuples(list(zip(*arrays)), names=['Letter', 'Number'])
    data = [[1, 2], [3, 4], [5, 6], [7, 8]]
    df = pd.DataFrame(data, index=index, columns=['X', 'Y'])
    print("\nMultiIndex DataFrame:")
    print(df)
    
    # Stacking
    stacked_df = df.stack()
    print("\nStacked MultiIndex DataFrame:")
    print(stacked_df)

def demonstrate_stack_with_groupby():
    # Sample DataFrame with multi-level columns
    multicol = pd.MultiIndex.from_tuples([('weight', 'kg'), ('height', 'm')])
    df = pd.DataFrame([[1.0, 2.0], [3.0, 4.0]],
                      index=['cat', 'dog'],
                      columns=multicol)
    print("\nMulti-Level Column DataFrame:")
    print(df)
    
    # Stacking the DataFrame
    stacked_df = df.stack()
    print("\nStacked DataFrame:")
    print(stacked_df)
    
    # Grouping by level 1 (units: kg, m)
    grouped = stacked_df.groupby(level=1).mean()
    print("\nGrouped by Level 1 (Average per unit):")
    print(grouped)

def main():
    demonstrate_stack()
    demonstrate_stack_multiindex()
    demonstrate_stack_with_groupby()
main()

import seaborn as sns
import matplotlib.pyplot as plt

df = pd.DataFrame({
    'Year': [2022, 2023, 2024],
    'Sales_A': [100, 120, 140],
    'Sales_B': [80, 90, 100]
})
df

stacked_df = df.set_index('Year').stack().reset_index()
stacked_df.columns = ['Year', 'Product', 'Sales']
stacked_df

sns.lineplot(data=stacked_df, x='Year', y='Sales', hue='Product')
plt.show()



