In [1]:
import pandas as pd
import numpy as np

In [2]:
#Fichier sans NA (ex : post preprocessing)

df = pd.read_csv("World_happiness_Stats.csv",
                index_col = 0)

df.head()

Unnamed: 0,Country,Year,Ladder_score,PIB_habitant,Social_support,Healthy_life_expectancy,Freedom,Generosity,Corruption,Positive_affect,Negative_affect,Region
0,Afghanistan,2008,3.724,7.37,0.451,50.8,0.718,0.168,0.882,0.518,0.258,South Asia
1,Afghanistan,2009,4.402,7.54,0.552,51.2,0.679,0.19,0.85,0.584,0.237,South Asia
2,Afghanistan,2010,4.758,7.647,0.539,51.6,0.6,0.121,0.707,0.618,0.275,South Asia
3,Afghanistan,2011,3.832,7.62,0.521,51.92,0.496,0.162,0.731,0.611,0.267,South Asia
4,Afghanistan,2012,3.783,7.705,0.521,52.24,0.531,0.236,0.776,0.71,0.268,South Asia


In [3]:
#Test de la loi normal

#On test la normalité des échnatillons
from scipy.stats import normaltest

# Sélectionner les colonnes numériques dans le dataframe
colonnes_numeriques = df.select_dtypes(include=['float64', 'int64']).columns

resultats_tests_normalite = []

# Boucle sur les colonnes numériques
for colonne in colonnes_numeriques:
    stat, p_value = normaltest(df[colonne])

    # Interpréter les résultats
    distribution_normale = "Oui" if p_value >= 0.05 else "Non"

    # Stocker les résultats dans la liste
    resultats_tests_normalite.append({
        'Colonne': colonne,
        'Statistique de test': stat,
        'p-value': p_value,
        'Distribution normale': distribution_normale
    })

# Créer un DataFrame à partir des résultats
df_resultats_normalite = pd.DataFrame(resultats_tests_normalite)
df_resultats_normalite



Unnamed: 0,Colonne,Statistique de test,p-value,Distribution normale
0,Year,717.538847,1.543189e-156,Non
1,Ladder_score,99.653222,2.293918e-22,Non
2,PIB_habitant,268.887489,4.090945e-59,Non
3,Social_support,311.391816,2.410606e-68,Non
4,Healthy_life_expectancy,158.121426,4.617092e-35,Non
5,Freedom,124.543619,9.030187000000001e-28,Non
6,Generosity,207.976111,6.895428e-46,Non
7,Corruption,528.479082,1.746729e-115,Non
8,Positive_affect,82.697971,1.102462e-18,Non
9,Negative_affect,98.162319,4.834185e-22,Non


In [4]:
#Test ANOVA

from scipy.stats import kruskal
from scikit_posthocs import posthoc_dunn

# Effectuer le test de Kruskal-Wallis
stat, p_value = kruskal(*[group["Ladder_score"] for name, group in df.groupby("Region")])

# Afficher les résultats du test global
print(f"Statistique de test : {stat}")
print(f"p-value : {p_value}")


# Utiliser le test de Dunn pour les comparaisons post hoc
dunn_results = posthoc_dunn(df, val_col="Ladder_score", group_col="Region", p_adjust='bonferroni')
    
# Afficher les résultats du test de Dunn
print("\nRésultats du test de Dunn :")
print(dunn_results)


Statistique de test : 1201.1044836316146
p-value : 7.003143332371369e-253

Résultats du test de Dunn :
                                    Central and Eastern Europe  \
Central and Eastern Europe                        1.000000e+00   
Commonwealth of Independent States                1.596643e-02   
East Asia                                         1.000000e+00   
Latin America and Caribbean                       1.289628e-04   
Middle East and North Africa                      7.690337e-01   
North America and ANZ                             2.807552e-20   
South Asia                                        4.609857e-14   
Southeast Asia                                    1.000000e+00   
Sub-Saharan Africa                                1.036294e-47   
Western Europe                                    1.759757e-32   

                                    Commonwealth of Independent States  \
Central and Eastern Europe                                1.596643e-02   
Commonwealth of Indepe

In [5]:
#Correlation de spearman pour chaque region

import pandas as pd
from scipy.stats import spearmanr


var_num = df.drop(["Country", "Year", "Region"], axis=1).select_dtypes(include=['int', 'float'])
x_col = "Ladder_score"

# Créer une liste pour stocker les résultats globaux
all_correlations = []

# Itérer sur chaque région
for region in df["Region"].unique():
    # Filtrer le DataFrame pour la région spécifique
    region_df = df[df["Region"] == region]
    
    # Créer une liste pour stocker les résultats de la région
    correlations = []

    for col in var_num.columns:
        if col != x_col:
            # Calculer la corrélation de Spearman pour la région spécifique
            correlation, p_value = spearmanr(region_df[x_col], region_df[col])
            correlation = round(correlation, 3)
            
            relation = "Très faible" if p_value < 0.05 and abs(correlation) <= 0.2 else \
                       "Faible" if p_value < 0.05 and abs(correlation) <= 0.4 else \
                       "Modérée" if p_value < 0.05 and abs(correlation) <= 0.6 else \
                       "Forte" if p_value < 0.05 and abs(correlation) <= 0.8 else \
                       "Très forte" if p_value < 0.05 and abs(correlation) > 0.8 else "/"

            # Stocker les résultats pour la région spécifique
            correlations.append({
                'Variable': col,
                'Coefficient de Spearman': correlation,
                'P-value': p_value,
                'Relation': relation
            })

    # Ajouter les résultats de la région à la liste globale
    all_correlations.append({
        'Region': region,
        'Correlations': correlations
    })

# Afficher les résultats pour chaque région
for region_correlations in all_correlations:
    print(f"\nRegion: {region_correlations['Region']}")
    correlations_df = pd.DataFrame(region_correlations['Correlations']).set_index("Variable")
    display(correlations_df)



Region: South Asia


Unnamed: 0_level_0,Coefficient de Spearman,P-value,Relation
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
PIB_habitant,0.066,0.532295,/
Social_support,0.102,0.334276,/
Healthy_life_expectancy,0.048,0.652621,/
Freedom,-0.025,0.816269,/
Generosity,-0.027,0.800756,/
Corruption,-0.204,0.052516,/
Positive_affect,-0.074,0.486186,/
Negative_affect,-0.014,0.892865,/



Region: Central and Eastern Europe


Unnamed: 0_level_0,Coefficient de Spearman,P-value,Relation
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
PIB_habitant,0.647,4.673158e-30,Forte
Social_support,0.573,1.699162e-22,Modérée
Healthy_life_expectancy,0.521,3.190018e-18,Modérée
Freedom,0.631,3.13949e-28,Forte
Generosity,-0.051,0.4339757,/
Corruption,-0.236,0.0002082524,Faible
Positive_affect,0.488,6.480755e-16,Modérée
Negative_affect,-0.345,3.435941e-08,Faible



Region: Middle East and North Africa


Unnamed: 0_level_0,Coefficient de Spearman,P-value,Relation
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
PIB_habitant,0.76,3.2281999999999997e-44,Forte
Social_support,0.681,1.837224e-32,Forte
Healthy_life_expectancy,0.56,3.3064059999999996e-20,Modérée
Freedom,0.49,3.502809e-15,Modérée
Generosity,0.431,1.017065e-11,Modérée
Corruption,-0.416,5.830034e-11,Modérée
Positive_affect,0.717,2.590444e-37,Forte
Negative_affect,-0.499,9.123584e-16,Modérée



Region: Latin America and Caribbean


Unnamed: 0_level_0,Coefficient de Spearman,P-value,Relation
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
PIB_habitant,0.61,8.340197e-32,Forte
Social_support,0.448,3.943775e-16,Modérée
Healthy_life_expectancy,0.484,5.505153e-19,Modérée
Freedom,0.31,4.380403e-08,Faible
Generosity,-0.138,0.01697021,Très faible
Corruption,-0.184,0.001420363,Très faible
Positive_affect,0.479,1.449197e-18,Modérée
Negative_affect,-0.405,2.964277e-13,Modérée



Region: Commonwealth of Independent States


Unnamed: 0_level_0,Coefficient de Spearman,P-value,Relation
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
PIB_habitant,0.274,0.0001800932,Faible
Social_support,0.658,5.646396e-24,Forte
Healthy_life_expectancy,-0.113,0.127434,/
Freedom,0.489,2.580036e-12,Modérée
Generosity,0.3,3.781272e-05,Faible
Corruption,-0.011,0.885758,/
Positive_affect,0.529,1.638635e-14,Modérée
Negative_affect,-0.329,5.614337e-06,Faible



Region: North America and ANZ


Unnamed: 0_level_0,Coefficient de Spearman,P-value,Relation
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
PIB_habitant,-0.56,2.250591e-06,Modérée
Social_support,0.581,7.210222e-07,Modérée
Healthy_life_expectancy,0.133,0.3032201,/
Freedom,0.598,2.794708e-07,Modérée
Generosity,0.585,6.082073e-07,Modérée
Corruption,-0.486,6.313628e-05,Modérée
Positive_affect,0.556,2.731605e-06,Modérée
Negative_affect,-0.394,0.001530675,Faible



Region: Western Europe


Unnamed: 0_level_0,Coefficient de Spearman,P-value,Relation
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
PIB_habitant,0.773,2.948091e-59,Forte
Social_support,0.678,1.004223e-40,Forte
Healthy_life_expectancy,-0.174,0.002792323,Très faible
Freedom,0.808,1.150448e-68,Très forte
Generosity,0.469,2.0895970000000003e-17,Modérée
Corruption,-0.817,1.876884e-71,Très forte
Positive_affect,0.689,2.046465e-42,Forte
Negative_affect,-0.79,1.367612e-63,Forte



Region: Sub-Saharan Africa


Unnamed: 0_level_0,Coefficient de Spearman,P-value,Relation
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
PIB_habitant,0.385,1.616746e-16,Faible
Social_support,0.238,6.369138e-07,Faible
Healthy_life_expectancy,0.027,0.5812913,/
Freedom,0.134,0.005439096,Très faible
Generosity,0.066,0.17083,/
Corruption,0.085,0.07806383,/
Positive_affect,0.253,1.261159e-07,Faible
Negative_affect,0.025,0.6089476,/



Region: Southeast Asia


Unnamed: 0_level_0,Coefficient de Spearman,P-value,Relation
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
PIB_habitant,0.859,1.179177e-37,Très forte
Social_support,0.8,4.565711e-29,Forte
Healthy_life_expectancy,0.777,1.624569e-26,Forte
Freedom,-0.198,0.02722836,Très faible
Generosity,-0.177,0.04768007,Très faible
Corruption,-0.059,0.5112393,/
Positive_affect,-0.081,0.3696528,/
Negative_affect,-0.59,4.27699e-13,Modérée



Region: East Asia


Unnamed: 0_level_0,Coefficient de Spearman,P-value,Relation
Variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
PIB_habitant,0.598,7.360624e-10,Modérée
Social_support,0.216,0.04331634,Faible
Healthy_life_expectancy,0.394,0.0001475615,Faible
Freedom,-0.197,0.06642797,/
Generosity,-0.081,0.4504864,/
Corruption,-0.141,0.1903922,/
Positive_affect,0.255,0.01645638,Faible
Negative_affect,-0.204,0.05646299,/


In [6]:
#Mettre dans un dataframe les différents coefficients de correlation de chaque variable en fonction du Ladder Score

import pandas as pd
from scipy.stats import spearmanr

# Sélection des colonnes d'intérêt
columns_of_interest = ["Ladder_score", "PIB_habitant", "Social_support", "Healthy_life_expectancy",
                        "Freedom", "Generosity", "Corruption", "Positive_affect", "Negative_affect"]

# Création d'une liste pour stocker les DataFrames intermédiaires
result_dfs = []

# Calcul du coefficient de corrélation de Spearman pour chaque colonne par groupe
for region, group in df.groupby("Region"):
    # Calcul des coefficients de corrélation de Spearman pour chaque paire (Ladder_score, autre colonne)
    coefficients = [spearmanr(group["Ladder_score"], group[col])[0] for col in columns_of_interest[1:]]

    # Création d'un DataFrame intermédiaire pour le groupe actuel
    result_df_group = pd.DataFrame([[region] + coefficients], columns=["Region"] + columns_of_interest[1:])
    
    # Ajout du DataFrame intermédiaire à la liste
    result_dfs.append(result_df_group)

# Concaténation de tous les DataFrames intermédiaires en un seul DataFrame final
result_df = pd.concat(result_dfs, ignore_index=True)

result_df = result_df.round(decimals=3)

# Affichage du résultat
display((result_df))


Unnamed: 0,Region,PIB_habitant,Social_support,Healthy_life_expectancy,Freedom,Generosity,Corruption,Positive_affect,Negative_affect
0,Central and Eastern Europe,0.647,0.573,0.521,0.631,-0.051,-0.236,0.488,-0.345
1,Commonwealth of Independent States,0.274,0.658,-0.113,0.489,0.3,-0.011,0.529,-0.329
2,East Asia,0.598,0.216,0.394,-0.197,-0.081,-0.141,0.255,-0.204
3,Latin America and Caribbean,0.61,0.448,0.484,0.31,-0.138,-0.184,0.479,-0.405
4,Middle East and North Africa,0.76,0.681,0.56,0.49,0.431,-0.416,0.717,-0.499
5,North America and ANZ,-0.56,0.581,0.133,0.598,0.585,-0.486,0.556,-0.394
6,South Asia,0.066,0.102,0.048,-0.025,-0.027,-0.204,-0.074,-0.014
7,Southeast Asia,0.859,0.8,0.777,-0.198,-0.177,-0.059,-0.081,-0.59
8,Sub-Saharan Africa,0.385,0.238,0.027,0.134,0.066,0.085,0.253,0.025
9,Western Europe,0.773,0.678,-0.174,0.808,0.469,-0.817,0.689,-0.79
