# Prediction de la date de saturation des hôpitaux

**Source :**

Report of the WHO-China Joint Mission on Coronavirus Disease 2019 : https://www.who.int/docs/default-source/coronaviruse/who-china-joint-mission-on-covid-19-final-report.pdf
Care for Critically Ill Patients With COVID-19 : https://jamanetwork.com/journals/jama/fullarticle/2762996
Daily time series : https://github.com/opencovid19-fr/data CSSE at Johns Hopkins University : https://github.com/CSSEGISandData/COVID-19

In [None]:
import numpy as np
import re
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
%matplotlib inline

## Analyse de l'évolution des cas de COVID19 la France vs le reste du monde

In [None]:
ts_confirmed_new = pd.read_csv("../input/novel-corona-virus-2019-dataset/time_series_covid_19_confirmed.csv")

In [None]:
ts_confirmed_new[ts_confirmed_new["Country/Region"] == "France"]

In [None]:
fig, axs = plt.subplots(2, 1, sharex=True, figsize=(10, 10))

axs[0].plot(ts_confirmed_new.sum()[2:])
axs[0].set_ylabel("Cases")
axs[1].set_ylabel("Cases")
axs[1].set_xlabel("time")
# axs[1].grid(True)
# axs[1].set_yscale("log")
# axs[0].set_yscale("log")

axs[1].plot(ts_confirmed_new[ts_confirmed_new["Country/Region"] != "China"].sum()[2:])
fig.tight_layout()

On remarque clairement que  le reste du monde entre dans une phase **exponentiel** alors que la Chine n'est plus dans cette phase.

## Construction des series temporels des cas confirmés 


In [None]:
f_confirmed = ts_confirmed_new[ts_confirmed_new["Country/Region"] == "France"].iloc[0][
    4:
]

i_confirmed = ts_confirmed_new[ts_confirmed_new["Country/Region"] == "Italy"].sum(
    axis=0
)[4:]

c_confirmed = ts_confirmed_new[
    ts_confirmed_new["Country/Region"] == "Mainland China"
].sum(axis=0)[4:]
sg_confirmed = ts_confirmed_new[ts_confirmed_new["Country/Region"] == "Singapore"].sum(
    axis=0
)[4:]
ger_confirmed = ts_confirmed_new[ts_confirmed_new["Country/Region"] == "Germany"].sum(
    axis=0
)[4:]
sp_confirmed = ts_confirmed_new[ts_confirmed_new["Country/Region"] == "Spain"].sum(
    axis=0
)[4:]
skorea_confirmed = ts_confirmed_new[
    ts_confirmed_new["Country/Region"] == "Korea, South"
].sum(axis=0)[4:]

taiwan_confirmed = ts_confirmed_new[
    ts_confirmed_new["Country/Region"] == "Taiwan*"
].sum(axis=0)[4:]

f_confirmed.index = pd.to_datetime(f_confirmed.index)
i_confirmed.index = pd.to_datetime(i_confirmed.index)
c_confirmed.index = pd.to_datetime(c_confirmed.index)
sg_confirmed.index = pd.to_datetime(sg_confirmed.index)
sp_confirmed.index = pd.to_datetime(sp_confirmed.index)
ger_confirmed.index = pd.to_datetime(ger_confirmed.index)
skorea_confirmed.index = pd.to_datetime(skorea_confirmed.index)
taiwan_confirmed.index = pd.to_datetime(taiwan_confirmed.index)

In [None]:
fig = plt.figure(figsize=(10, 20))
ax = fig.add_subplot(2, 1, 1)
ax.set_yscale("log")

f_confirmed.plot(label="France", marker="^")
i_confirmed.plot(label="Italy")
c_confirmed.plot(label="China")
sg_confirmed.plot(label="Singapore")
sp_confirmed.plot(label="Spain")
skorea_confirmed.plot(label="South Korea")
ger_confirmed.plot(label="Germany")
taiwan_confirmed.plot(label="Taiwan")

plt.legend(loc="best")

## Facteur de changement

Le facteur de changement de la courbe est égale au ratio de nouveaux cas détéctés entredeux jours. Autrement dit, c'est le ratio de la différence de nombres de cas détectés entre deux jours successifs

In [None]:
def growth(serie):
    g = []
    index = []
    for i in range(len(f_confirmed) - 2):
        if serie.diff()[i] == 0:
            g.append(0)
        else:
            g.append(serie.diff()[i + 1] / serie.diff()[i])
        index.append(serie.index[i])
    return pd.Series(data=g, index=index).replace([np.inf, -np.inf], np.nan).fillna(0)

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(6, 6))

growth(f_confirmed).plot(ax=ax[0], label="France")
growth(i_confirmed).plot(ax=ax[1], label="Italie")

# growth(c_confirmed).pct_change().plot(label='China')
# growth(sg_confirmed).pct_change().plot(label="Singapore")

ax[0].legend(loc="best")
ax[1].legend(loc="best")

## Ratio de changement 

In [None]:
f_confirmed_new = ts_confirmed_new[ts_confirmed_new["Country/Region"] == "France"].sum(axis=0)[10:]
f_confirmed_new.index = pd.to_datetime(f_confirmed_new.index)


In [None]:
def ratio_change(serie):
    g = []
    index = []
    for i in range(len(serie) - 1):
        if i == 0:
            g.append(1)
        else:
            g.append(serie[i] / serie[i - 1])
        index.append(serie.index[i])
    return pd.Series(data=g, index=index).replace([np.inf, -np.inf], np.nan).fillna(0)

In [None]:
sns.boxplot(
    ratio_change(
        ts_confirmed_new[ts_confirmed_new["Country/Region"] != "China"].sum()[10:]
    )
)
print(
    ratio_change(
        ts_confirmed_new[ts_confirmed_new["Country/Region"] != "China"].sum()[10:]
    ).describe()
)

In [None]:
sns.boxplot(ratio_change(f_confirmed_new[10:]))
print(ratio_change(f_confirmed_new[10:]).describe())

Nous remarquons un ratio de changement variant de 1.16 à 1.22 dans le monde, un ratio de 1.18 en moyenne en France (avec une plus grande variance : 0.27)
Ceci pourrait s'expliquer par le manque de tests généralisés en France.

## Temps pour doubler 

In [None]:
dates = []
serie = ts_confirmed_new[ts_confirmed_new["Country/Region"] != "China"].sum()[2:]
serie.index = pd.to_datetime(serie.index)
for i in range(len(serie) - 1):
    j = i
    while (serie[j] < 2 * serie[i]) & (j < len(serie) - 1):
        j = j + 1
    if j > i + 1:
        dates.append(serie.index[j] - serie.index[i])
print("Moyenne pour doubler le chiffre est de ", np.mean(dates))

## De combien de jours l'avancement des cas en France est en retard par rapport à l'Italie


In [None]:
ilist = i_confirmed.where(i_confirmed > 30).dropna().tolist()
flist = f_confirmed.where(f_confirmed > 40).dropna().tolist()

fig = plt.figure(figsize=(15, 15))
ax = fig.add_subplot(2, 1, 1)
# ax.set_yscale("log")
ax.plot(ilist, "g--", label="Italy")
ax.plot(flist, "b--", label="France")

### Remarques


En décalant les deux courbes pour avoir le même début, nous remarquons que les courbes ont la même forme exponentiel, la France a pu retarder l'évolution des cas de COVID19 un peu plus longtemps de l'Italie mais la progression du virus est visuellement la même;

**Comme annoncé par le président de la république Macron, l'épidémie en France ne fait que commencer.**

# LSTM trained on french data

In [None]:
import math
from datetime import timedelta
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error

In [None]:
# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence) - 1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return np.asarray(X), np.asarray(y)

In [None]:
f_confirmed = ts_confirmed_new[ts_confirmed_new["Country/Region"] == "France"].sum(axis=0)[10:]
f_confirmed.index = pd.to_datetime(f_confirmed.index)

# define input sequence
# choose a number of time steps ( equivalent to # of features)
n_steps = 4

X, y = split_sequence(f_confirmed[:'2020-03-14'], n_steps)

In [None]:
n_features = 1

X = X.reshape((X.shape[0], X.shape[1], n_features))

# define model
model = Sequential()
model.add(LSTM(10, activation="relu", input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer="adam", loss="mse")


# fit model
history = model.fit(X, y, epochs=200, verbose=0)

# history for loss
plt.plot(history.history["loss"])
plt.title("model loss")
plt.ylabel("loss")
plt.xlabel("epoch")
plt.legend(["train", "test"], loc="upper left")
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6, 6))

pred = model.predict(X)
pred_serie = pd.Series(data=pred.flatten(), index=f_confirmed[4:].index)

pred_serie.plot(ax=ax, label="prediction")
f_confirmed[4:].plot(ax=ax, label="training")

ax.legend(loc="best")

## Prediction

Prédire l'évolution des cas de COVID19 en France sur le prochains 30 jours

In [None]:
n = 30
index30 = pd.date_range(f_confirmed[:'2020-03-11'].index[-1] + timedelta(days=1), periods=n, freq="D")
last_values = f_confirmed[:'2020-03-11'][-n_steps:].values
prediction30 = []

In [None]:
for i in range(n):
    x_input = last_values[-n_steps:]
    x_input = x_input.reshape((1, n_steps, n_features))
    y_pred = model.predict(x_input)
    prediction30.append(int(y_pred[0][0]))
    last_values = np.append(last_values, int(y_pred[0][0]))

serie30_france = pd.Series(data=prediction30, index=index30)

In [None]:
fig = plt.figure(figsize=(15, 15))
ax = fig.add_subplot(2, 1, 1)

ax.set_yscale("log")

f_confirmed_new = ts_confirmed_new[ts_confirmed_new["Country/Region"] == "France"].sum(
    axis=0
)[4:]
f_confirmed_new.index = pd.to_datetime(f_confirmed_new.index)


f_confirmed.plot(ax=ax, label="france_new", marker="o", linestyle="")
f_confirmed[:'2020-03-11'].plot(ax=ax, label="france")
# c_confirmed.plot(ax=ax, c="red", label="China")

serie30_france.plot(ax=ax, c="grey", linestyle="--", label="predition for 30 days")

# LSTM used for South korea

## Prepare train data

In [None]:
# define input sequence
# choose a number of time steps ( equivalent to # of features)
n_steps = 3

X, y = split_sequence(skorea_confirmed[:-1], n_steps)

n_features = 1

# scaler = StandardScaler()
# trainX = scaler.fit_transform(X)
#
# trainX = trainX.reshape((trainX.shape[0], trainX.shape[1], n_features))

X = X.reshape((X.shape[0], X.shape[1], n_features))

## Model

In [None]:
# define model
model = Sequential()
model.add(LSTM(10, activation="relu", input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer="adam", loss="mse")


# fit model
history = model.fit(X, y, epochs=250, verbose=0)

# history for loss
plt.plot(history.history["loss"])
plt.title("model loss")
plt.ylabel("loss")
plt.xlabel("epoch")
plt.legend(["train", "test"], loc="upper left")
plt.show()

## Prediction

In [None]:
n = 100
index30 = pd.date_range(f_confirmed.index[-1] + timedelta(days=1), periods=n, freq="D")
last_values = f_confirmed[-n_steps:].values
prediction30 = []


for i in range(n):
    x_input = last_values[-n_steps:]
    x_input = x_input.reshape((1, n_steps))
    x_input = x_input.reshape((1, n_steps, n_features))
    y_pred = model.predict(x_input)
    prediction30.append(int(y_pred[0][0]))
    last_values = np.append(last_values, int(y_pred[0][0]))

serie30 = pd.Series(data=prediction30, index=index30)

In [None]:
fig = plt.figure(figsize=(15, 30))
ax = fig.add_subplot(2, 1, 1)

ax.set_yscale("log")

f_confirmed[:'2020-03-11'].plot(ax=ax, label="Train data France")
f_confirmed_new.plot(ax=ax, label="france_new", marker="o", linestyle="")
skorea_confirmed[10:].plot(ax=ax, marker="^", label="Corée du Sud")

serie30.plot(
    ax=ax, c="grey", linestyle="--", label="prediction en suivant la corée du sud"
)


serie30_france.plot(
    ax=ax, c="red", linestyle="--", label="predition suivant la progression france"
)


ax.set_xlabel("Time")
ax.set_ylabel("Cases")
ax.legend(loc="best")

## Saturation des lits de réanimation dans les hôpitaux


Sachant que la france dispose de 5000 lits sur le territoire et que **5% des cas contaminés par le COVID19 nécessitent un prise en charge hospitalière critique** (source : https://jamanetwork.com/journals/jama/fullarticle/2762996).

In [None]:
hosp = f_confirmed_new.apply(lambda x: x * 0.05)
hosp_france = serie30_france.apply(lambda x: x * 0.05)
hosp_ks = serie30.apply(lambda x: x * 0.05)

In [None]:
fig = plt.figure(figsize=(10, 20))
ax = fig.add_subplot(2, 1, 1)
ax.set_yscale("log")

hosp.plot(ax=ax, c="grey", linestyle="--", label="cas hospitalisé")
hosp_ks.plot(
    ax=ax, c="blue", linestyle="--", label="prediction en suivant la corée du sud"
)
hosp_france.plot(
    ax=ax, c="red", linestyle="--", label="predition suivant la progression france"
)

ax.set_ylabel("# Hospitalisés")
ax.set_xlabel("Time")
ax.legend(loc="best")
ax.axhline(y=5000, linewidth=3, color="black", alpha=0.5)

In [None]:
print(
    "Date de saturation du système hospitalien Francais dans le pire scénario :"
    + hosp_france.where(hosp_france >= 5000).dropna().index[0].strftime("%d/%m/%Y")
)
print(
    "Date de saturation du système hospitalien Francais dans le meilleur scénario :"
    + hosp_ks.where(hosp_ks >= 5000).dropna().index[0].strftime("%d/%m/%Y")
)