In [1]:
import pandas as pd

# Step 1: Load all 3 cleaned datasets
gdp = pd.read_csv("D:/project3/GDP-per-capita-project/Cleaned_data/gdp_clean.csv")
gini = pd.read_csv("D:/project3/GDP-per-capita-project/Cleaned_data/gini_clean.csv")
health = pd.read_csv("D:/project3/GDP-per-capita-project/Cleaned_data/health_clean.csv")

# Quick preview
print("GDP shape:", gdp.shape)
print("GINI shape:", gini.shape)
print("Health shape:", health.shape)

gdp.head()


GDP shape: (256, 23)
GINI shape: (53, 23)
Health shape: (233, 23)


Unnamed: 0,Country Name,Country Code,2002,2003,2004,2005,2006,2007,2008,2009,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Aruba,ABW,21307.248251,21949.485996,23700.63199,24171.837133,24845.658484,26736.308944,28171.909401,25134.77123,...,25813.576727,26129.839062,27458.225331,27441.529662,28440.051964,30082.127645,31096.205074,22855.93232,27200.061079,30559.533535
1,Africa Eastern and Southern,AFE,625.998815,809.968139,981.744631,1115.713956,1220.002468,1362.310651,1421.118612,1386.590206,...,1673.140476,1656.107642,1479.564123,1329.777824,1520.171298,1538.924188,1493.780445,1344.080962,1522.590088,1628.024526
2,Afghanistan,AFG,178.954088,198.871116,221.763654,254.184249,274.218554,376.223152,381.733238,452.053705,...,637.087099,625.054942,565.56973,522.082216,525.469771,491.337221,496.602504,510.787063,356.496214,357.261153
3,Africa Western and Central,AFW,616.796033,694.500939,834.861291,989.699739,1222.550498,1393.748487,1652.18985,1439.047803,...,2113.316986,2204.9957,1845.767804,1616.843198,1560.162999,1703.896392,1783.654365,1664.249176,1747.840549,1777.235012
4,Angola,AGO,882.147847,992.698979,1266.210864,1916.468422,2617.05196,3141.04598,4103.0669,3136.64547,...,5057.747878,5005.999741,3213.902611,1807.952941,2437.259712,2538.591391,2189.855714,1449.922867,1925.874661,2929.694455


In [3]:
# Step 2a: Convert GDP from wide to long format
gdp_long = gdp.melt(id_vars=['Country Name', 'Country Code'], 
                    var_name='Year', 
                    value_name='GDP_per_capita')

gdp_long.head()


Unnamed: 0,Country Name,Country Code,Year,GDP_per_capita
0,Aruba,ABW,2002,21307.248251
1,Africa Eastern and Southern,AFE,2002,625.998815
2,Afghanistan,AFG,2002,178.954088
3,Africa Western and Central,AFW,2002,616.796033
4,Angola,AGO,2002,882.147847


In [5]:
# Step 2b: Convert GINI from wide to long format
gini_long = gini.melt(id_vars=['Country Name', 'Country Code'], 
                      var_name='Year', 
                      value_name='GINI_index')

gini_long.head()


Unnamed: 0,Country Name,Country Code,Year,GINI_index
0,Argentina,ARG,2002,53.8
1,Armenia,ARM,2002,34.8
2,Austria,AUT,2002,
3,Belgium,BEL,2002,
4,Bulgaria,BGR,2002,


In [7]:
# Step 2c: Convert Health Spending from wide to long format
health_long = health.melt(id_vars=['Country Name', 'Country Code'], 
                          var_name='Year', 
                          value_name='Health_spending_per_capita')

health_long.head()


Unnamed: 0,Country Name,Country Code,Year,Health_spending_per_capita
0,Africa Eastern and Southern,AFE,2002,35.19015
1,Afghanistan,AFG,2002,17.007586
2,Africa Western and Central,AFW,2002,21.827682
3,Angola,AGO,2002,29.0548
4,Albania,ALB,2002,78.994781


In [9]:
# Step 3: Merge GDP with GINI
gdp_gini_merged = pd.merge(gdp_long, gini_long, 
                           on=['Country Name', 'Country Code', 'Year'], 
                           how='inner')

# Then merge with Health
final_merged = pd.merge(gdp_gini_merged, health_long, 
                        on=['Country Name', 'Country Code', 'Year'], 
                        how='inner')

# Preview the merged dataset
print("Final merged shape:", final_merged.shape)
final_merged.head()


Final merged shape: (1113, 6)


Unnamed: 0,Country Name,Country Code,Year,GDP_per_capita,GINI_index,Health_spending_per_capita
0,Argentina,ARG,2002,2569.699635,53.8,222.647507
1,Armenia,ARM,2002,742.651118,34.8,58.0
2,Austria,AUT,2002,26334.862215,,2498.0
3,Belgium,BEL,2002,25006.191397,,2084.719971
4,Bulgaria,BGR,2002,2092.982886,,148.0


In [11]:
# Step 4: Save the final merged dataset
output_path = "D:/project3/GDP-per-capita-project/Cleaned_data/merged_data_2002_2022.csv"
final_merged.to_csv(output_path, index=False)

print("Merged dataset saved to:", output_path)


Merged dataset saved to: D:/project3/GDP-per-capita-project/Cleaned_data/merged_data_2002_2022.csv
