<h1>COVID-19 in France : visualisation</h1>
This notebook compares death ratio / hospitalisations accross different districts

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

raw_data = pd.read_csv( "../input/coronavirusdataset-france/chiffres-cles.csv", parse_dates=['date']) # raw dataframe
df_china = pd.read_csv( "../input/coronavirusdataset-france/china.csv", parse_dates=['date'])
df_italy = pd.read_csv( "../input/coronavirusdataset-france/contagioitalia.csv", parse_dates=['date'])
df_korea = pd.read_csv( "../input/coronavirusdataset/Case.csv")
df_korea_time = pd.read_csv( "../input/coronavirusdataset/SearchTrend.csv", parse_dates=['date'])
df_korea_patient = pd.read_csv( "../input/coronavirusdataset/PatientInfo.csv")

In [None]:
df_pop_french_departements = pd.read_csv("../input/population-des-dpartements-franais/population_departements_francais.csv", encoding='utf-8')

In [None]:
raw_data.rename(columns={'cas_confirmes':'cases', 'deces':'deaths'},inplace=True) #important variable namess in English
raw_data.head()

In [None]:
latest_date = max(raw_data['date'])
print(latest_date)
national_latest = raw_data[raw_data['date'] == latest_date]

In [None]:
df_departement =  raw_data.loc[raw_data.granularite =='departement', :]
df_departement.rename(columns={'maille_nom':'district'},inplace=True) 
df_departement.tail()

In [None]:
'''
df_departement =  raw_data[raw_data.granularite =='departement']
df_departement.rename(columns={'maille_nom':'district'},inplace=True) 
df_departement.tail()
'''

In [None]:
df_departement['district'].unique()

# Scatter plot nombre de décès / Nombre de patients hospitalisés

In [None]:
import matplotlib.ticker as mtick

gb_departement= df_departement[df_departement['date'] == df_departement['date'].max()].reset_index()

data_ratio_hospitalises = (gb_departement['deaths'] / (gb_departement['hospitalises'] + gb_departement['gueris'])) * 100

data_hospitalises = gb_departement['hospitalises'] + gb_departement['gueris']
#data_depcode = gb_departement['maille_code']
data_depcode = gb_departement['district']

current_date = gb_departement['date'].max().strftime('%d/%m/%Y')

fig, ax = plt.subplots(figsize=(16, 10))

plt.title(f"Ratio des décès à l'hôpital / Nombre d'hospitalisations au {current_date}", fontsize=20)
plt.ylabel("Nombre cumulé de décès / Nombre cumulé d'hospitalisations")
plt.xlabel("Nombre cumulé de patients hospitalisés")

nbhospitalises_80p = data_hospitalises.sum() * 0.80
min_value_80p = data_hospitalises.loc[data_hospitalises[data_hospitalises.sort_values(ascending=False).cumsum() <= nbhospitalises_80p].index].min()

for i, txt in enumerate(data_depcode):
    if (data_hospitalises[i] > data_hospitalises.max() * 0.20):
    #if (data_hospitalises[i] > min_value_80p):
        ax.annotate(txt, (data_hospitalises[i], data_ratio_hospitalises[i]), xytext=(data_hospitalises[i] + 20, data_ratio_hospitalises[i]))        

plt.axhline(data_ratio_hospitalises.mean(), color='red', linestyle='--', label=f'Ratio de décès moyen ({data_ratio_hospitalises.mean():.2f}%)')

plt.axvline(min_value_80p, color='green', linestyle='--', label=f"80% du nb d'hospitalisés en France sont à droite de la ligne ({nbhospitalises_80p:.0f} hospitalisés)")

ax.scatter(data_hospitalises, data_ratio_hospitalises)

ax.annotate('Source : https://www.kaggle.com/franoisboyer/covid-19-in-france-visualisation/', xy=(1, 0), xytext=(-15, 10), fontsize=10,
    xycoords='axes fraction', textcoords='offset points',
    bbox=dict(facecolor='white', alpha=0.8),
    horizontalalignment='right', verticalalignment='bottom')

ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.0f%%'))
plt.legend()

current_date_file = gb_departement['date'].max().strftime('%Y%m%d')
plt.savefig('covid19-comparatif-departements-'+str(current_date_file)+'.png')

In [None]:
data_hospitalises.count()

In [None]:
data_deaths = gb_departement['deaths']
#data_hospitalises = (gb_departement['hospitalises'] + gb_departement['gueris'])
data_hospitalises = (gb_departement.sort_values(by='maille_code', ascending=True)['hospitalises'] + gb_departement.sort_values(by='maille_code', ascending=True)['gueris'])

In [None]:
gb_departement

In [None]:
raw_data[(raw_data['granularite'] == 'departement') & (raw_data['date'] == '2020-04-20')]

In [None]:
import seaborn as sns; sns.set(color_codes=True)

plt.figure(figsize=(32, 20))

plt.title(f"Nombre de décès à l'hôpital / Nombre d'hospitalisations au {current_date}", fontsize=25)
# Set x-axis label

ax = sns.regplot(x=data_hospitalises, y=data_deaths, ci=99.9, truncate=False)

ax.set(xlabel="Nombre cumulé d'hospitalisés depuis le début de la crise", ylabel='Nombre de décès')
plt.axvline(min_value_80p, color='green', linestyle='--', label=f"80% du nb d'hospitalisés en France sont à droite de la ligne ({nbhospitalises_80p:.0f} hospitalisés)")
plt.legend()

for i, txt in enumerate(data_depcode):
    if (data_hospitalises[i] > data_hospitalises.max() * 0.20) or (txt == 'Isère') or (txt == 'Hautes-Alpes') or (txt == 'Haute-Garonne') or (txt == 'Ardennes') or (txt == 'Haute-Corse'):
        ax.annotate(txt, (data_hospitalises[i], data_deaths[i]), xytext=(data_hospitalises[i] + 20, data_deaths[i]))      
        #ax.annotate(txt, (data_hospitalises[i], data_hospitalises[i]))      
        
ax.annotate('Source : https://www.kaggle.com/franoisboyer/covid-19-in-france-visualisation/', xy=(1, 0), xytext=(-15, 10), fontsize=10,
    xycoords='axes fraction', textcoords='offset points',
    bbox=dict(facecolor='white', alpha=0.8),
    horizontalalignment='right', verticalalignment='bottom')
        
plt.savefig('covid19-comparatif-departements-regplot-'+str(current_date_file)+'.png')

In [None]:
import seaborn as sns; sns.set(color_codes=True)

plt.figure(figsize=(32, 20))

plt.title(f"Nombre d'hospitalisés / population en France au {current_date}", fontsize=25)
# Set x-axis label

data_pop_plotted = df_pop_french_departements['Total']

ax = sns.regplot(x=data_pop_plotted, y=data_hospitalises, ci=99.9, truncate=False)

popcount_80p = data_pop_plotted.sum() * 0.80
min_value_80p = data_pop_plotted.loc[data_pop_plotted[data_pop_plotted.sort_values(ascending=False).cumsum() <= popcount_80p].index].min()

ax.set(xlabel="Population", ylabel="Nombre d'hospitalisés")
plt.axvline(min_value_80p, color='green', linestyle='--', label=f"80% de la population de France est à droite de la ligne ({popcount_80p:.0f} personnes)")
plt.legend()

for i, txt in enumerate(data_depcode):
    if (data_pop_plotted[i] > data_pop_plotted.max() * 0.20):
        ax.annotate(txt, (data_pop_plotted[i], data_hospitalises[i]), xytext=(data_pop_plotted[i] + 20, data_hospitalises[i]))      
        #ax.annotate(txt, (data_hospitalises[i], data_hospitalises[i]))      
        
ax.annotate('Source : https://www.kaggle.com/franoisboyer/covid-19-in-france-visualisation/', xy=(1, 0), xytext=(-15, 10), fontsize=10,
    xycoords='axes fraction', textcoords='offset points',
    bbox=dict(facecolor='white', alpha=0.8),
    horizontalalignment='right', verticalalignment='bottom')
        
plt.savefig('covid19-comparatif-departements-population-hospitalises-regplot-'+str(current_date_file)+'.png')

In [None]:
# This shows that departments are in the same order in data_hospitalises (first data source) and in df_pop_french_departements (2nd data source) so we could plot them against each other
pd.concat(  [df_pop_french_departements[['Total', 'Code département']], gb_departement['maille_code'], (gb_departement['hospitalises'] + gb_departement['gueris']), data_hospitalises, data_depcode], axis=1)

In [None]:
import seaborn as sns; sns.set(color_codes=True)

plt.figure(figsize=(32, 20))

plt.title(f"Nombre d'hospitalisés / population de plus de 60 ans en France au {current_date}", fontsize=25)
# Set x-axis label

data_pop_plotted = df_pop_french_departements['60 à 74 ans'] + df_pop_french_departements['75 ans et plus']

ax = sns.regplot(x=data_pop_plotted, y=data_hospitalises, ci=99.9, truncate=False)

popcount_80p = data_pop_plotted.sum() * 0.80
min_value_80p = data_pop_plotted.loc[data_pop_plotted[data_pop_plotted.sort_values(ascending=False).cumsum() <= popcount_80p].index].min()

ax.set(xlabel="Population de plus de 60 ans", ylabel="Nombre d'hospitalisés")
plt.axvline(min_value_80p, color='green', linestyle='--', label=f"80% de la population française de plus de 60 ans est à droite de la ligne ({popcount_80p:.0f} personnes)")
plt.legend()

for i, txt in enumerate(data_depcode):
    if (data_pop_plotted[i] > data_pop_plotted.max() * 0.20):
        ax.annotate(txt, (data_pop_plotted[i], data_hospitalises[i]), xytext=(data_pop_plotted[i] + 20, data_hospitalises[i]))      
        #ax.annotate(txt, (data_hospitalises[i], data_hospitalises[i]))      
        
ax.annotate('Source : https://www.kaggle.com/franoisboyer/covid-19-in-france-visualisation/', xy=(1, 0), xytext=(-15, 10), fontsize=10,
    xycoords='axes fraction', textcoords='offset points',
    bbox=dict(facecolor='white', alpha=0.8),
    horizontalalignment='right', verticalalignment='bottom')
        
plt.savefig('covid19-comparatif-departements-population-60plus-hospitalises-regplot-'+str(current_date_file)+'.png')

In [None]:
gb_departement['gueris'].sum()

In [None]:
gb_departement['deaths'].sum()

In [None]:
gb_departement

In [None]:
gb_departement[gb_departement['district'] == 'Hautes-Alpes']

In [None]:
gb_departement[gb_departement['district'] == 'Haute-Corse']

In [None]:
gb_departement[gb_departement['district'] == 'Ardennes']