In [20]:
import pandas as pd

# Load two datasets (assuming they have common columns for merging and joining)
df1 = pd.read_csv("/content/delhi_aqi.csv")
df2 = pd.read_csv("/content/delhi_aqi.csv")

# Strip any leading or trailing spaces in column names
df1.columns = df1.columns.str.strip()
df2.columns = df2.columns.str.strip()

# Display column names to verify common columns
print("DF1 Columns:", df1.columns)
print("DF2 Columns:", df2.columns)

# Identify a common column for merging
common_column = "date"  # Updated to use an actual common column

# 1. Concatenating DataFrames (stacking them on top of each other)
df_concat = pd.concat([df1, df2], ignore_index=True)

# 2. Merging DataFrames based on a common column
df_merge = pd.merge(df1, df2, on=common_column, how="inner")

# 3. Performing different types of joins
inner_join = df1.merge(df2, on=common_column, how="inner")
left_join = df1.merge(df2, on=common_column, how="left")
right_join = df1.merge(df2, on=common_column, how="right")
outer_join = df1.merge(df2, on=common_column, how="outer")

# 4. Aggregation and Grouping (grouping data and computing statistics)
grouped = df1.groupby("so2").agg({"pm2_5": ["mean", "sum", "count"]})

# 5. Creating a Pivot Table to summarize data
pivot_table = df1.pivot_table(index="so2", values="pm2_5", aggfunc="mean")

# Display results
print("Concatenated DataFrame:", df_concat.head())
print("Merged DataFrame:", df_merge.head())
print("Grouped Data:", grouped.head())
print("Pivot Table:", pivot_table.head())


DF1 Columns: Index(['date', 'co', 'no', 'no2', 'o3', 'so2', 'pm2_5', 'pm10', 'nh3'], dtype='object')
DF2 Columns: Index(['date', 'co', 'no', 'no2', 'o3', 'so2', 'pm2_5', 'pm10', 'nh3'], dtype='object')
Concatenated DataFrame:                   date       co     no     no2     o3    so2   pm2_5    pm10  \
0  2020-11-25 01:00:00  2616.88   2.18   70.60  13.59  38.62  364.61  411.73   
1  2020-11-25 02:00:00  3631.59  23.25   89.11   0.33  54.36  420.96  486.21   
2  2020-11-25 03:00:00  4539.49  52.75  100.08   1.11  68.67  463.68  541.95   
3  2020-11-25 04:00:00  4539.49  50.96  111.04   6.44  78.20  454.81  534.00   
4  2020-11-25 05:00:00  4379.27  42.92  117.90  17.17  87.74  448.14  529.19   

     nh3  
0  28.63  
1  41.04  
2  49.14  
3  48.13  
4  46.61  
Merged DataFrame:                   date     co_x   no_x   no2_x   o3_x  so2_x  pm2_5_x  pm10_x  \
0  2020-11-25 01:00:00  2616.88   2.18   70.60  13.59  38.62   364.61  411.73   
1  2020-11-25 02:00:00  3631.59  23.25   89.11 