In [1]:
import pandas as pd


In [7]:
covid_df = pd.read_csv('covid-data.csv')

econ_df = pd.read_csv('world_bank_economic_data.csv')


In [8]:
print(econ_df.columns.tolist())


['Country Name', 'Country Code', 'Series Name', 'Series Code', '2019 [YR2019]', '2022 [YR2022]']


In [10]:

econ_df.rename(columns={
    'Country Name': 'Country',
    'Series Name': 'Indicator'
}, inplace=True)

econ_df_melted = econ_df.melt(
    id_vars=['Country', 'Indicator'],
    var_name='Year',
    value_name='Value'
)

econ_df_melted['Year'] = econ_df_melted['Year'].str.extract(r'(\d{4})')
econ_df_melted.dropna(subset=['Year'], inplace=True)
econ_df_melted['Year'] = econ_df_melted['Year'].astype(int)

econ_df_melted['Value'] = pd.to_numeric(econ_df_melted['Value'], errors='coerce')

econ_pivot = econ_df_melted.pivot_table(
    index=['Country', 'Year'],
    columns='Indicator',
    values='Value'
).reset_index()

print(econ_pivot.head())


Indicator      Country  Year  GDP (current US$)  \
0          Afghanistan  2019       1.879944e+10   
1          Afghanistan  2022       1.449724e+10   
2              Albania  2019       1.558511e+10   
3              Albania  2022       1.901724e+10   
4              Algeria  2019       1.934597e+11   

Indicator  Inflation, consumer prices (annual %)  \
0                                       2.302373   
1                                      13.712102   
2                                       1.411091   
3                                       6.725203   
4                                       1.951768   

Indicator  Merchandise trade (% of GDP)  \
0                             40.644818   
1                             43.801429   
2                             55.264305   
3                             66.823568   
4                             41.323343   

Indicator  Unemployment, total (% of total labor force) (national estimate)  
0                                          

In [12]:
covid_df_simple = covid_df[['location', 'date', 'total_cases', 'total_deaths']].copy()

covid_df_simple.rename(columns={'location': 'Country'}, inplace=True)

covid_df_simple['Year'] = pd.to_datetime(covid_df_simple['date']).dt.year

covid_latest = covid_df_simple.groupby(['Country', 'Year'])[['total_cases', 'total_deaths']].max().reset_index()


In [13]:
print(covid_latest.head())


       Country  Year  total_cases  total_deaths
0  Afghanistan  2020      51848.0        2158.0
1  Afghanistan  2021     157902.0        7352.0
2  Afghanistan  2022     207322.0        7845.0
3  Afghanistan  2023     230375.0        7973.0
4  Afghanistan  2024     235214.0        7998.0


In [14]:
merged_df = pd.merge(econ_pivot, covid_latest, on=['Country', 'Year'], how='inner')

print(merged_df.head())


          Country  Year  GDP (current US$)  \
0     Afghanistan  2022       1.449724e+10   
1         Albania  2022       1.901724e+10   
2         Algeria  2022       2.256385e+11   
3  American Samoa  2022       8.710000e+08   
4         Andorra  2022       3.380613e+09   

   Inflation, consumer prices (annual %)  Merchandise trade (% of GDP)  \
0                              13.712102                     43.801429   
1                               6.725203                     66.823568   
2                               9.265516                     46.351141   
3                                    NaN                    116.991963   
4                                    NaN                     66.555985   

   Unemployment, total (% of total labor force) (national estimate)  \
0                                                NaN                  
1                                             10.785                  
2                                                NaN             

In [15]:
merged_df.to_csv('merged_data.csv', index=False)


In [16]:
merged_df_filtered = merged_df[merged_df['Country'] == 'India']

merged_df_filtered = merged_df[(merged_df['Year'] >= 2020) & (merged_df['Year'] <= 2022)]

print(merged_df_filtered.head())


          Country  Year  GDP (current US$)  \
0     Afghanistan  2022       1.449724e+10   
1         Albania  2022       1.901724e+10   
2         Algeria  2022       2.256385e+11   
3  American Samoa  2022       8.710000e+08   
4         Andorra  2022       3.380613e+09   

   Inflation, consumer prices (annual %)  Merchandise trade (% of GDP)  \
0                              13.712102                     43.801429   
1                               6.725203                     66.823568   
2                               9.265516                     46.351141   
3                                    NaN                    116.991963   
4                                    NaN                     66.555985   

   Unemployment, total (% of total labor force) (national estimate)  \
0                                                NaN                  
1                                             10.785                  
2                                                NaN             

In [17]:
merged_df.to_csv('merged_economy_covid_data.csv', index=False)
