In [1]:
import os
import pandas as pd

In [2]:
# Set display options
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [3]:
# The path to CSV files
file_paths = ["2015.csv", "2016.csv", "2017.csv", "2018.csv", "2019.csv", "2020.csv"]

# Create a DataFrame to store combined data
combined_df = pd.DataFrame()

In [4]:
# Loop through the file paths, read each CSV file with custom separators and append it to the combined DataFrame with the year as the first column:
for file_path in file_paths:
    year = os.path.splitext(os.path.basename(file_path))[0]  # Extract the year from the file name
    df = pd.read_csv(file_path, thousands=',', decimal='.')
    
    # Add the "Year" column as the first column
    df.insert(0, "Year", year)
    
    # Rename the columns
    df.rename(columns={
        "Country or region": "Country",
        "Country name": "Country",
        "Happiness.Score": "Happiness Score",
        "Ladder score": "Happiness Score",
        "Score": "Happiness Score",
        "Economy..GDP.per.Capita.": "GDP per Capita",
        "Economy (GDP per Capita)": "GDP per Capita",
        "Logged GDP per capita": "GDP per Capita",
        "GDP per capita": "GDP per Capita",
        "Family": "Social Support",
        "Social support": "Social Support",
        "Explained by: GDP per capita": "GDP per Capita",
        "Health..Life.Expectancy.": "Life Expectancy/Health",
        "Health (Life Expectancy)": "Life Expectancy/Health",
        "Healthy life expectancy": "Life Expectancy/Health",
        "Freedom to make life choices": "Freedom",
        "Trust..Government.Corruption.": "Trust (government corruption)",
        "Trust (Government Corruption)": "Trust (government corruption)",
        "Perceptions of corruption": "Trust (government corruption)",
        "Explained by: Generosity": "Generosity"
    }, inplace=True)
    
    # Remove duplicate columns
    df = df.loc[:, ~df.columns.duplicated()]
    
    # Reset the index
    df.reset_index(drop=True, inplace=True)
    
    combined_df = pd.concat([combined_df, df], ignore_index=True)

# Reorder the columns to match your desired order
desired_order = ["Year", "Country", "Happiness Score", "GDP per Capita", 
                 "Social Support", "Life Expectancy/Health", "Freedom", "Trust (government corruption)", 
                 "Generosity"]

combined_df = combined_df[desired_order]

# Print or export the cleaned DataFrame
print(combined_df)

pd.reset_option('display.max_rows')
pd.reset_option('display.max_columns')
combined_df

# Export the cleaned DataFrame to a CSV file
output_file = "output/combined_data.csv"
combined_df.to_csv(output_file, index=False)

     Year                    Country  Happiness Score  GDP per Capita  \
0    2015                Switzerland           7.5870        1.396510   
1    2015                    Iceland           7.5610        1.302320   
2    2015                    Denmark           7.5270        1.325480   
3    2015                     Norway           7.5220        1.459000   
4    2015                     Canada           7.4270        1.326290   
5    2015                    Finland           7.4060        1.290250   
6    2015                Netherlands           7.3780        1.329440   
7    2015                     Sweden           7.3640        1.331710   
8    2015                New Zealand           7.2860        1.250180   
9    2015                  Australia           7.2840        1.333580   
10   2015                     Israel           7.2780        1.228570   
11   2015                 Costa Rica           7.2260        0.955780   
12   2015                    Austria           7.20

In [5]:
combined_df['Country'].nunique()


172

In [6]:
# Set the display option to show all rows
pd.set_option('display.max_rows', None)

# Display the entire list of countries and their counts
countries = combined_df['Country'].value_counts()
print(countries)

# Reset the display option to its default value
pd.reset_option('display.max_rows')

Switzerland                  6
India                        6
Mongolia                     6
Greece                       6
Lebanon                      6
Hungary                      6
Honduras                     6
Tajikistan                   6
Tunisia                      6
Palestinian Territories      6
Bangladesh                   6
Iran                         6
Ukraine                      6
Iraq                         6
South Africa                 6
Ghana                        6
Zimbabwe                     6
Dominican Republic           6
Bosnia and Herzegovina       6
Albania                      6
Montenegro                   6
Turkey                       6
Kyrgyzstan                   6
Nigeria                      6
Azerbaijan                   6
Pakistan                     6
Jordan                       6
China                        6
Morocco                      6
Zambia                       6
Romania                      6
Iceland                      6
Portugal