In [5]:
# Import pandas
import pandas as pd

# Read and parse World_Happiness_2015.csv
happiness2015 = pd.read_csv('../data/World_Happiness_2015.csv')
# Add year column for 2015
happiness2015['Year'] = 2015

# Read and parse World_Happiness_2016.csv
happiness2016 = pd.read_csv('../data/World_Happiness_2016.csv')
# Add year column for 2016
happiness2016['Year'] = 2016

# Read and parse World_Happiness_2017.csv
happiness2017 = pd.read_csv('../data/World_Happiness_2017.csv')
# Add year column for 2017
happiness2017['Year'] = 2017

# You can view the data like this:
print("2015 Data:")
print(happiness2015.head())
print("\n2016 Data:")
print(happiness2016.head())
print("\n2017 Data:")
print(happiness2017.head())

2015 Data:
       Country          Region  Happiness Rank  Happiness Score  \
0  Switzerland  Western Europe               1            7.587   
1      Iceland  Western Europe               2            7.561   
2      Denmark  Western Europe               3            7.527   
3       Norway  Western Europe               4            7.522   
4       Canada   North America               5            7.427   

   Standard Error  Economy (GDP per Capita)   Family  \
0         0.03411                   1.39651  1.34951   
1         0.04884                   1.30232  1.40223   
2         0.03328                   1.32548  1.36058   
3         0.03880                   1.45900  1.33095   
4         0.03553                   1.32629  1.32261   

   Health (Life Expectancy)  Freedom  Trust (Government Corruption)  \
0                   0.94143  0.66557                        0.41978   
1                   0.94784  0.62877                        0.14145   
2                   0.87464  0.64938

In [6]:
#Select only the columns we want from each dataframe
happiness2015_subset = happiness2015[['Country', 'Happiness Score', 'Year']].head(3)
happiness2016_subset = happiness2016[['Country', 'Happiness Score', 'Year']].head(3)

# Concatenate the two dataframes vertically and reset the index
happiness_combined_1516 = pd.concat([happiness2015_subset, happiness2016_subset], axis=0).reset_index(drop=True)

# Display the result
print("Combined 2015-2016 Data (First 3 rows from each year) with consecutive index:")
print(happiness_combined_1516)




Combined 2015-2016 Data (First 3 rows from each year) with consecutive index:
       Country  Happiness Score  Year
0  Switzerland            7.587  2015
1      Iceland            7.561  2015
2      Denmark            7.527  2015
3      Denmark            7.526  2016
4  Switzerland            7.509  2016
5      Iceland            7.501  2016


In [9]:
three_2015 = happiness2015[['Country','Happiness Rank','Year']].iloc[2:5]
three_2016 = happiness2016[['Country','Happiness Rank','Year']].iloc[2:5]

# Merge the two dataframes on the Country column
merged = pd.merge(three_2015, three_2016, on='Country', how='inner', suffixes=('_2015', '_2016'))

# Display the result
print("Combined 2015-2016 Data (Merged on Country):")
print(merged)



Combined 2015-2016 Data (Merged on Country):
  Country  Happiness Rank_2015  Year_2015  Happiness Rank_2016  Year_2016
0  Norway                    4       2015                    4       2016


In [10]:

# Print unique countries in each dataset
print("Countries in 2015 dataset:")
print(set(three_2015['Country']))
print("\nCountries in 2016 dataset:")
print(set(three_2016['Country']))

# Check for exact matches
matching_countries = set(three_2015['Country']) & set(three_2016['Country'])
print("\nCountries present in both years:")
print(matching_countries)

# Merge the two dataframes on the Country column
merged = pd.merge(three_2015, three_2016, on='Country')
print("\nMerged Dataset:")
print(merged)

Countries in 2015 dataset:
{'Canada', 'Norway', 'Denmark'}

Countries in 2016 dataset:
{'Finland', 'Iceland', 'Norway'}

Countries present in both years:
{'Norway'}

Merged Dataset:
  Country  Happiness Rank_x  Year_x  Happiness Rank_y  Year_y
0  Norway                 4    2015                 4    2016


In [11]:
# Standardize column names
happiness_2015 = happiness_2015.rename(columns={
    'Happiness Score': 'Happiness_Score', 
    'Happiness Rank': 'Happiness_Rank'
})
happiness_2016 = happiness_2016.rename(columns={
    'Happiness Score': 'Happiness_Score', 
    'Happiness Rank': 'Happiness_Rank'
})
happiness_2017 = happiness_2017.rename(columns={
    'Happiness.Score': 'Happiness_Score', 
    'Happiness.Rank': 'Happiness_Rank'
})

# Add year column to each dataset
happiness_2015['Year'] = 2015
happiness_2016['Year'] = 2016
happiness_2017['Year'] = 2017

# Function to calculate global happiness statistics
def calculate_happiness_stats(df):
    return {
        'mean': df['Happiness_Score'].mean(),
        'median': df['Happiness_Score'].median(),
        'min': df['Happiness_Score'].min(),
        'max': df['Happiness_Score'].max(),
        'std': df['Happiness_Score'].std()
    }

# Calculate statistics for each year
stats_2015 = calculate_happiness_stats(happiness_2015)
stats_2016 = calculate_happiness_stats(happiness_2016)
stats_2017 = calculate_happiness_stats(happiness_2017)

# Print detailed statistics
print("Happiness Score Statistics:")
print("2015:", stats_2015)
print("2016:", stats_2016)
print("2017:", stats_2017)

# Merge datasets to compare countries across years
# Use outer merge to keep all countries
merged_data = pd.merge(
    happiness_2015[['Country', 'Happiness_Score']], 
    happiness_2016[['Country', 'Happiness_Score']], 
    on='Country', 
    how='outer', 
    suffixes=('_2015', '_2016')
)

merged_data = pd.merge(
    merged_data, 
    happiness_2017[['Country', 'Happiness_Score']], 
    on='Country', 
    how='outer'
)
merged_data.columns = ['Country', 'Score_2015', 'Score_2016', 'Score_2017']

# Calculate change between years
merged_data['Change_2015_2016'] = merged_data['Score_2016'] - merged_data['Score_2015']
merged_data['Change_2016_2017'] = merged_data['Score_2017'] - merged_data['Score_2016']

# Print overall trend
print("\nOverall Trend Analysis:")
print(f"Mean Change 2015 to 2016: {merged_data['Change_2015_2016'].mean():.4f}")
print(f"Mean Change 2016 to 2017: {merged_data['Change_2016_2017'].mean():.4f}")

# Percentage of countries with positive/negative changes
print("\nCountry Change Percentages:")
pct_improved_2015_2016 = (merged_data['Change_2015_2016'] > 0).mean() * 100
pct_improved_2016_2017 = (merged_data['Change_2016_2017'] > 0).mean() * 100

print(f"Percentage of Countries Improving 2015-2016: {pct_improved_2015_2016:.2f}%")
print(f"Percentage of Countries Improving 2016-2017: {pct_improved_2016_2017:.2f}%")

# Visualize the distribution of changes
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
merged_data['Change_2015_2016'].hist(bins=20)
plt.title('Happiness Score Changes 2015-2016')
plt.xlabel('Change in Happiness Score')
plt.ylabel('Number of Countries')

plt.subplot(1,2,2)
merged_data['Change_2016_2017'].hist(bins=20)
plt.title('Happiness Score Changes 2016-2017')
plt.xlabel('Change in Happiness Score')
plt.ylabel('Number of Countries')

plt.tight_layout()
plt.show()


Happiness Score Statistics:
2015: {'mean': np.float64(5.375734177215189), 'median': np.float64(5.2325), 'min': np.float64(2.839), 'max': np.float64(7.587), 'std': np.float64(1.1450101349520665)}
2016: {'mean': np.float64(5.382184713375795), 'median': np.float64(5.314), 'min': np.float64(2.905), 'max': np.float64(7.526), 'std': np.float64(1.1416735176005715)}
2017: {'mean': np.float64(5.354019355773926), 'median': np.float64(5.27899980545044), 'min': np.float64(2.69300007820129), 'max': np.float64(7.53700017929077), 'std': np.float64(1.1312300899149939)}

Overall Trend Analysis:
Mean Change 2015 to 2016: -0.0104
Mean Change 2016 to 2017: 0.0064

Country Change Percentages:
Percentage of Countries Improving 2015-2016: 43.37%
Percentage of Countries Improving 2016-2017: 44.58%


NameError: name 'plt' is not defined