## Réduction du groupe via les données de sécurité alimentaire

In [1]:
import numpy as np
import pandas as pd
import pickle
from sklearn import decomposition
from sklearn import preprocessing
import matplotlib.pyplot as plt

In [2]:
# Dataframe import
with open('data/part2.pkl', 'rb') as f:
    my_unpickler = pickle.Unpickler(f)
    main_df = my_unpickler.load()

In [3]:
# Import and process data

security_df = pd.read_csv("data/securite-alimentaire-2018.csv")
security_df = (pd
                .pivot_table(
                            security_df,
                            values="Valeur",
                            index=['Zone'],
                            columns=['Produit'],
                            aggfunc=np.sum)
                
                .reset_index()

                .rename(columns={
                    'Zone': 'country',
                    'PIB par habitant, ($ PPA internationaux constants de 2011)': 'gdp_per_capita'
                })
                
                # Reset the columns name
                .rename_axis(None, axis=1)
               
               # Set countries as index
                .set_index('country')
               
                .sort_values(by='gdp_per_capita', ascending=False)
              )

# Display the dataframe
display(security_df.head(10))

# Merge data
main_df = main_df.merge(security_df, left_index=True, right_index=True)

# Display new dataframe
display(main_df.sort_values(by='gdp_per_capita', ascending=False).head(10))

Unnamed: 0_level_0,gdp_per_capita
country,Unnamed: 1_level_1
Qatar,116936.0
Chine - RAS de Macao,104862.0
Luxembourg,94278.0
Singapour,85535.4
Brunéi Darussalam,71809.3
Irlande,67335.3
Émirats arabes unis,67293.5
Koweït,65530.5
Norvège,64800.1
Suisse,57410.2


Unnamed: 0_level_0,pop_evol_ratio,kcal_total_capita_day,proteins_total_capita_day,proteins_animal_ratio,cluster,F1,gdp_per_capita
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Chine - RAS de Macao,0.254989,2915.0,94.75,0.644855,Autre,1.017492,104862.0
Luxembourg,0.185682,3540.0,113.64,0.634636,Occident,2.451546,94278.0
Brunéi Darussalam,0.180791,2985.0,93.29,0.567585,Autre,0.940942,71809.3
Irlande,0.153578,3602.0,109.91,0.589755,Occident,2.330983,67335.3
Émirats arabes unis,1.774117,3275.0,104.51,0.399005,Émirats arabes unis,-1.709935,67293.5
Koweït,0.592155,3499.0,108.15,0.495423,Moyen Orient,1.070239,65530.5
Norvège,0.105921,3483.0,110.73,0.595954,Occident,2.307315,64800.1
Suisse,0.108245,3393.0,92.89,0.643449,Autre,1.824445,57410.2
Chine - RAS de Hong-Kong,0.043,3286.0,129.07,0.731464,Occident,3.145492,56054.9
États-Unis d'Amérique,0.092761,3682.0,109.42,0.637726,Occident,2.682404,54225.4


## Analyse du PIB par habitant par groupe

In [4]:
display(main_df[['cluster', 'gdp_per_capita']].groupby('cluster').mean())
display(main_df[['cluster', 'gdp_per_capita']].groupby('cluster').count())

Unnamed: 0_level_0,gdp_per_capita
cluster,Unnamed: 1_level_1
Afrique,3657.856522
Autre,18382.60303
Moyen Orient,18598.91
Occident,39203.0375
Émirats arabes unis,67293.5


Unnamed: 0_level_0,gdp_per_capita
cluster,Unnamed: 1_level_1
Afrique,46
Autre,66
Moyen Orient,20
Occident,32
Émirats arabes unis,1


## Interprétation

Notre groupe a le PIB par habitant le plus élevé si on excepte les Emirats Arabes Unis. Ceci est rassurant quant à la qualité de nos clusters.

## Sélection.

Regardons le Top 10 de notre groupe "Occident".

In [5]:
(main_df
         # Just cluster 3
         .query('cluster == "Occident"')
         
         # Top GDP
         .sort_values(by='gdp_per_capita', ascending=False)
 
         # Top 10
         .head(10)
 
         # Top Pop Evolution
         .sort_values(by='pop_evol_ratio', ascending=False)
)

Unnamed: 0_level_0,pop_evol_ratio,kcal_total_capita_day,proteins_total_capita_day,proteins_animal_ratio,cluster,F1,gdp_per_capita
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Luxembourg,0.185682,3540.0,113.64,0.634636,Occident,2.451546,94278.0
Irlande,0.153578,3602.0,109.91,0.589755,Occident,2.330983,67335.3
Islande,0.137931,3381.0,133.06,0.725086,Occident,3.195288,46483.0
Norvège,0.105921,3483.0,110.73,0.595954,Occident,2.307315,64800.1
États-Unis d'Amérique,0.092761,3682.0,109.42,0.637726,Occident,2.682404,54225.4
Suède,0.070342,3180.0,107.48,0.659006,Occident,2.095984,46949.3
Danemark,0.043842,3366.0,108.74,0.642082,Occident,2.360457,46682.5
Autriche,0.043355,3770.0,106.2,0.591902,Occident,2.637046,45436.7
Chine - RAS de Hong-Kong,0.043,3286.0,129.07,0.731464,Occident,3.145492,56054.9
Pays-Bas,0.038738,3222.0,111.46,0.679795,Occident,2.388988,48472.5


On notera que Hong-Kong n'est pas une ville d'occident, mais en a toutes les caractéristiques, ce qui n'est pas étonnant de la retrouver ici.

## Sauvegarde du dataframe

In [6]:
with open('data/part4.pkl', 'wb') as f:
    my_pickle = pickle.Pickler(f)
    my_pickle.dump(main_df)