In [None]:
import pandas as pd

# Crear un DataFrame vacío para almacenar los datos concatenados
concatenated_data = pd.DataFrame()

# Iterar a través de los archivos CSV de 2015 a 2023
for year in range(2015, 2024):
    filename = f'/kaggle/input/global-happiness-scores-and-factors/WHR_{year}.csv'  # Nombre del archivo CSV para cada año
    data = pd.read_csv(filename)  # Leer el archivo CSV
    data['year'] = year
    concatenated_data = pd.concat([concatenated_data, data], ignore_index=True)  # Concatenar al DataFrame principal

# Ver las primeras filas del DataFrame concatenado
print(concatenated_data.head())

# Guardar el DataFrame concatenado en un nuevo archivo CSV
concatenated_data.to_csv('WHR_concatenated.csv', index=False)


# Ver las primeras filas del DataFrame concatenado
print(concatenated_data.head())

# Guardar el DataFrame concatenado en un nuevo archivo CSV
concatenated_data.to_csv('WHR_concatenated_with_year.csv', index=False)



In [None]:
data= pd.read_csv('/kaggle/working/WHR_concatenated_with_year.csv')
data

In [None]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns

# Univariate analysis and plots
# Histogram of happiness score
plt.figure(figsize=(10, 6))
sns.histplot(data=data, x='happiness_score', bins=20, kde=True)
plt.title('Distribution of happiness score')
plt.xlabel('Happiness Score')
plt.ylabel('Frequency')
plt.show()



In [None]:
# Bivariate analysis and plots
# Scatter plot between GDP per capita and happiness score
plt.figure(figsize=(10, 6))
sns.scatterplot(data=data, x='gdp_per_capita', y='happiness_score', hue='year', palette='viridis')
plt.title('Relationship between GDP per capita and happiness score')
plt.xlabel('GDP per Capita')
plt.ylabel('Happiness Score')
plt.show()





In [None]:
# Select only numeric columns for correlation calculation
numeric_data = data.select_dtypes(include='number')

# Calculate the correlation matrix
correlation_matrix = numeric_data.corr()

# Multivariate analysis
# Heatmap of correlation between all numeric variables
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Matrix between Numeric Variables')
plt.show()



In [None]:
# Stacked bar chart for happiness score by region
fig = px.bar(data, x='region', y='happiness_score', color='year', title='Happiness Score by Region and Year')
fig.show()



In [None]:
# Line plot of happiness score over the years
plt.figure(figsize=(12, 6))
sns.lineplot(data=data, x='year', y='happiness_score', errorbar=None)
plt.title('Happiness Score Over the Years')
plt.xlabel('Year')
plt.ylabel('Happiness Score')
plt.show()

In [None]:
def print_top_countries(data_frame, num_countries, ascending=True):
    top_countries = data_frame.nsmallest(num_countries * 2, 'happiness_score') if ascending else data_frame.nlargest(num_countries * 2, 'happiness_score')

    print("\n" + "-" * 54)
    print(f"|{'':^50}|")
    print(f"|{'Top 10 Least Happy Countries' if ascending else 'Top 10 Happiest Countries':^50}|")
    print(f"|{'':^50}|")
    print("-" * 54)

    printed_countries = set()  # Conjunto para rastrear los países ya impresos
    countries_printed = 0  # Contador de países impresos

    for index, row in top_countries.iterrows():
        country = row['country']
        if country not in printed_countries and countries_printed < num_countries:
            print(f"| {country:<40} | {row['happiness_score']:.2f} |")
            printed_countries.add(country)  # Agregar el país al conjunto de países impresos
            countries_printed += 1

    print("-" * 54)

# 10 Least Happy Countries
print_top_countries(data, 12, ascending=True)

# 10 Happiest Countries
print_top_countries(data,30, ascending=False)


In [None]:
numeric_data = data.select_dtypes(include='number')
correlation_with_happiness = numeric_data.corrwith(numeric_data['happiness_score'])
print("Correlation with happiness")
print("\n" + "-" * 40)
print(correlation_with_happiness)
print("-" * 40)
