In [1]:
from hdx.hdx_configuration import Configuration 
from hdx.data.dataset import Dataset
import requests
import pandas as pd
import numpy as np
import csv
from zipfile import ZipFile
import os

In [2]:
def download_url(url, save_path, chunk_size=128):
    r = requests.get(url, stream=True)
    with open(save_path, 'wb') as fd:
        for chunk in r.iter_content(chunk_size=chunk_size):
            fd.write(chunk)

# Population

In [3]:
prediction_data  = pd.read_csv('../data/train/pop/fr/departements-francais.csv', sep=';')
prediction_data.columns = ['dep_num', 'name', 'region', 'capital', 'area', 'total', 'density']
prediction_data = prediction_data.sort_values('dep_num')
prediction_data = prediction_data[:-5]
prediction_data['region'] = prediction_data['region'].replace({'Ile-de-France':'Île-de-France'})
prediction_data

Unnamed: 0,dep_num,name,region,capital,area,total,density
0,1,Ain,Auvergne-Rhône-Alpes,Bourg-en-Bresse,5762,631877,109.7
1,2,Aisne,Hauts-de-France,Laon,7369,538659,73.1
2,3,Allier,Auvergne-Rhône-Alpes,Moulins,7340,341613,46.5
3,4,Alpes-de-Haute-Provence,Provence-Alpes-Côte d'Azur,Digne,6925,161799,23.4
4,5,Hautes-Alpes,Provence-Alpes-Côte d'Azur,Gap,5549,140916,25.4
...,...,...,...,...,...,...,...
93,93,Seine-Saint-Denis,Île-de-France,Bobigny,236,1592663,6742.9
94,94,Val-de-Marne,Île-de-France,Créteil,245,1372389,5601.6
95,95,Val-d'Oise,Île-de-France,Pontoise,1246,1215390,975.5
19,201,Corse-du-Sud,Corse,Ajaccio,4014,152730,38.0


# Covid

In [None]:
#import requests
#import pandas as pd
#import datetime
#departements = pd.read_csv("data/pop/fr/departements-francais.csv", sep = ";")
#hospi = []
#url = "https://coronavirusapi-france.now.sh/AllLiveData"
#response = requests.get(url).json()
#counter = 0
#for numero in departements.NUMÉRO:
#    nom = str(departements[departements["NUMÉRO"]==numero]["NOM"].values[0])
#    hospi.append((nom, numero, response["allLiveFranceData"][counter]["nouvellesHospitalisations"]))
#    counter+=1
#
#df = pd.DataFrame(hospi, columns =["depname","depnum","newhospi"])
#print(df)
#df.to_csv("covid_daily_data.csv", index = False)

# CAMS

In [None]:
#to_do (Niclas ?)

# Mobility

In [None]:
Configuration.create(hdx_site='prod', user_agent='A_Quick_Example', hdx_read_only=True)
dataset = Dataset.read_from_hdx('movement-range-maps')
resources = dataset.get_resources()
dic = resources[1]
url_mobility = dic['download_url']

file_mobility = "../data/prediction/mvt_range.zip"
download_url(url_mobility, file_mobility)

In [None]:
with ZipFile(file_mobility, 'r') as zip:
    # printing all the contents of the zip file
    zip.printdir()
  
    # extracting all the files
    print('Extracting mv_range file now...')
    mvt_range = zip.namelist()[-1]
    zip.extract(mvt_range)
    print('Done!')

In [None]:
with open(mvt_range) as f:
    reader = csv.reader(f, delimiter="\t")
    d = list(reader)

In [None]:
data_mob = pd.DataFrame(d[1:], columns=d[0])
data_mob = data_mob[data_mob['country']=='FRA']

In [None]:
data_mob = data_mob[data_mob['ds']==list(data_mob.iloc[[-1]]['ds'])[0]][['ds', 'polygon_name', 'all_day_bing_tiles_visited_relative_change', 'all_day_ratio_single_tile_users']]

In [None]:
prediction_data['stay_put'] = 0.0
prediction_data['go_out'] = 0.0

In [None]:
def add_go_out(row):
    region = row['region']
    go_out = data_mob[data_mob['polygon_name']==region]['all_day_bing_tiles_visited_relative_change']
    return float(list(go_out)[0])
     
def add_stay_put(row):   
    region = row['region']
    stay_put = data_mob[data_mob['polygon_name']==region]['all_day_ratio_single_tile_users']
    return float(list(stay_put)[0])

In [None]:
prediction_data['go_out'] = prediction_data.apply(add_go_out, axis=1)
prediction_data['stay_put'] = prediction_data.apply(add_stay_put, axis=1)
prediction_data

# Vaccins

In [None]:
url_positive_test = 'https://www.data.gouv.fr/es/datasets/r/59aeab47-c364-462c-9087-ce233b6acbbc'
download_url(url_positive_test, '../data/prediction/live_vaccins.csv')

In [None]:
live_vacc = pd.read_csv('../data/prediction/live_vaccins.csv')
live_vacc['date_debut_semaine'] = pd.to_datetime(live_vacc['date_debut_semaine'])
date_max = live_vacc['date_debut_semaine'].max()

In [None]:
vacc_1 = live_vacc[live_vacc['rang_vaccinal']==1]
vacc_2 = live_vacc[live_vacc['rang_vaccinal']==2]

In [None]:
def live_vacc_1(row):
    dep = row['dep_num']
    vacc_1_reg = vacc_1[vacc_1['code_region']==dep]
    if vacc_1_reg.shape[0]!=0:
        nb_series = vacc_1_reg[vacc_1_reg['date_debut_semaine']==date_max]['nb']
        nb = list(nb_series)[0]
    else : 
        nb=0
    return nb

def live_vacc_2(row):
    dep = row['dep_num']
    vacc_2_reg = vacc_2[vacc_2['code_region']==dep]
    if vacc_2_reg.shape[0]!=0:
        nb_series = vacc_2_reg[vacc_2_reg['date_debut_semaine']==date_max]['nb']
        nb = list(nb_series)[0]
    else : 
        nb=0
    return nb

In [None]:
prediction_data['vacc_1'] = prediction_data.apply(live_vacc_1, axis=1)
prediction_data['vacc_2'] = prediction_data.apply(live_vacc_2, axis=1)
prediction_data

# Positive tests

In [None]:
url_positive = 'https://www.data.gouv.fr/en/datasets/r/406c6a23-e283-4300-9484-54e78c8ae675'

with requests.Session() as s:
    download = s.get(url_positive)

    decoded_content = download.content.decode('utf-8')

    cr = csv.reader(decoded_content.splitlines(), delimiter=';')
    my_list = list(cr)

In [None]:
live_positive = pd.DataFrame(my_list[1:],columns=my_list[0])

live_positive

In [None]:
live_positive['dep'].unique()

In [None]:
def replace_dep(row):
    if type(row['dep']) != int:
        row['dep'] = int(row['dep'].replace('2A','201').replace('2B','202'))
    return None

In [None]:
live_positive.apply(replace_dep, axis=1)

In [None]:
def P_to_int(row):
    return int(row['P'])
live_positive['P']= live_positive.apply(P_to_int, axis=1)

In [None]:
live_positive= live_positive[live_positive["dep"]<203]
live_positive["jour"]=pd.to_datetime(live_positive["jour"], dayfirst = True)

In [None]:
live_positive = live_positive[live_positive['jour']==live_positive['jour'].max()]

In [None]:
live_positive['P'].unique()

In [None]:
live_positive = live_positive.groupby('dep').sum().reset_index()

In [None]:
live_positive

# Variants

In [4]:
url_variants = 'https://www.data.gouv.fr/fr/datasets/r/16f4fd03-797f-4616-bca9-78ff212d06e8'
file_variants = '../data/prediction/live_variants.csv'
download_url(url_variants, file_variants)

In [5]:
live_variants = pd.read_csv(file_variants, sep=';')
live_variants = live_variants[['dep','semaine','Nb_susp_501Y_V1','Nb_susp_501Y_V2_3']]

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [6]:
def replace_dep(row):
    if type(row['dep']) != int:
        return int(row['dep'].replace('2A','201').replace('2B', '202'))

live_variants['dep'] = live_variants.apply(replace_dep, axis=1)
live_variants = live_variants[live_variants['dep']<203]
live_variants['dep'] = live_variants['dep'].astype(int)

In [7]:
def get_semaine(row):
    return pd.to_datetime(row['semaine'][-10:])

live_variants['semaine'] = live_variants.apply(get_semaine, axis=1)

live_variants = live_variants.groupby(['dep', 'semaine']).sum().reset_index()

In [8]:
max_date = live_variants['semaine'].max()

In [9]:
def add_V1(row):
    correct_dep = live_variants[live_variants['dep']==row['dep_num']]
    V1 = correct_dep[correct_dep['semaine']==max_date]['Nb_susp_501Y_V1']
    return int(list(V1)[0])

def add_V2(row):
    correct_dep = live_variants[live_variants['dep']==row['dep_num']]
    V2 = correct_dep[correct_dep['semaine']==max_date]['Nb_susp_501Y_V2_3']
    return int(list(V2)[0])

In [10]:
prediction_data['variant_1'] = prediction_data.apply(add_V1, axis=1)
prediction_data['variant_2'] = prediction_data.apply(add_V2, axis=1)

In [11]:
prediction_data

Unnamed: 0,dep_num,name,region,capital,area,total,density,variant_1,variant_2
0,1,Ain,Auvergne-Rhône-Alpes,Bourg-en-Bresse,5762,631877,109.7,2082,44
1,2,Aisne,Hauts-de-France,Laon,7369,538659,73.1,1762,64
2,3,Allier,Auvergne-Rhône-Alpes,Moulins,7340,341613,46.5,782,28
3,4,Alpes-de-Haute-Provence,Provence-Alpes-Côte d'Azur,Digne,6925,161799,23.4,404,0
4,5,Hautes-Alpes,Provence-Alpes-Côte d'Azur,Gap,5549,140916,25.4,344,4
...,...,...,...,...,...,...,...,...,...
93,93,Seine-Saint-Denis,Île-de-France,Bobigny,236,1592663,6742.9,2153,235
94,94,Val-de-Marne,Île-de-France,Créteil,245,1372389,5601.6,1865,216
95,95,Val-d'Oise,Île-de-France,Pontoise,1246,1215390,975.5,2297,124
19,201,Corse-du-Sud,Corse,Ajaccio,4014,152730,38.0,348,4
