# Combining & Reshaping Data

Combining multiple DataFrames and reshaping between wide and long formats using Pandas.


In [2]:
import pandas as pd
import numpy as np

df_sales = pd.DataFrame({
    'id': [1, 2, 3],
    'city': ['Delhi', 'Mumbai', 'Chennai'],
    'sales': [100, 150, 90]
})

df_profit = pd.DataFrame({
    'id': [1, 2, 3],
    'profit': [20, 35, 10]
})

df_sales, df_profit

(   id     city  sales
 0   1    Delhi    100
 1   2   Mumbai    150
 2   3  Chennai     90,
    id  profit
 0   1      20
 1   2      35
 2   3      10)

## Combining DataFrames

In [3]:
pd.concat([df_sales, df_sales])

Unnamed: 0,id,city,sales
0,1,Delhi,100
1,2,Mumbai,150
2,3,Chennai,90
0,1,Delhi,100
1,2,Mumbai,150
2,3,Chennai,90


In [4]:
pd.concat([df_sales, df_sales], axis=1)

Unnamed: 0,id,city,sales,id.1,city.1,sales.1
0,1,Delhi,100,1,Delhi,100
1,2,Mumbai,150,2,Mumbai,150
2,3,Chennai,90,3,Chennai,90


In [5]:
pd.merge(df_sales, df_profit, on='id')

Unnamed: 0,id,city,sales,profit
0,1,Delhi,100,20
1,2,Mumbai,150,35
2,3,Chennai,90,10


In [6]:
pd.merge(df_sales, df_profit, on='id', how='left')

Unnamed: 0,id,city,sales,profit
0,1,Delhi,100,20
1,2,Mumbai,150,35
2,3,Chennai,90,10


In [7]:
df_sales.join(df_profit.set_index('id'), on='id')

Unnamed: 0,id,city,sales,profit
0,1,Delhi,100,20
1,2,Mumbai,150,35
2,3,Chennai,90,10


Note: `append()` is deprecated. Use `pd.concat()` instead.


## Reshaping data

In [8]:
df_wide = pd.DataFrame({
    'city': ['Delhi', 'Mumbai'],
    '2022': [100, 150],
    '2023': [120, 160]
})

df_wide

Unnamed: 0,city,2022,2023
0,Delhi,100,120
1,Mumbai,150,160


In [9]:
df_long = df_wide.melt(id_vars='city', var_name='year', value_name='sales')
df_long

Unnamed: 0,city,year,sales
0,Delhi,2022,100
1,Mumbai,2022,150
2,Delhi,2023,120
3,Mumbai,2023,160


In [10]:
df_long.pivot(index='city', columns='year', values='sales')

year,2022,2023
city,Unnamed: 1_level_1,Unnamed: 2_level_1
Delhi,100,120
Mumbai,150,160


In [11]:
df_long.pivot_table(index='city', columns='year', values='sales', aggfunc='mean')

year,2022,2023
city,Unnamed: 1_level_1,Unnamed: 2_level_1
Delhi,100.0,120.0
Mumbai,150.0,160.0


In [12]:
df_stack = df_long.set_index(['city', 'year'])['sales']
df_stack

city    year
Delhi   2022    100
Mumbai  2022    150
Delhi   2023    120
Mumbai  2023    160
Name: sales, dtype: int64

In [13]:
df_stack.unstack()

year,2022,2023
city,Unnamed: 1_level_1,Unnamed: 2_level_1
Delhi,100,120
Mumbai,150,160


Wide format → columns are variables  
Long format → rows are observations
