In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
import plotly.graph_objects as go
#from fbprophet import Prophet
#import pycountry
import plotly.express as px

## Данный ноутбук посвящен визуалиации на тему распространения COVID-19 с февраля до сегодняшнего дня. 
Ниже буду посмотрены графики для России, рассчитан риск смертности, заболевания и выздоравления для некоторых стран.
Также будет визуализация для всего мира по времени.


## Считаем данные о заболевании

In [3]:
data = pd.read_csv('../Paper/novel-corona-virus-2019-dataset/covid_19_data.csv',parse_dates=['Last Update'])

In [4]:
data['Date'] = pd.to_datetime(data['ObservationDate'])

In [5]:
data.rename(columns={'Date': 'date', 
                     'Province/State':'state',
                     'Country/Region':'country',
                     'Confirmed': 'confirmed',
                     'Deaths':'deaths',
                     'Recovered':'recovered',
                     'SNo':'id'
                    }, inplace=True)
data.head()
data['country'] = data['country'].replace('Mainland China', 'China')
data['active'] = data['confirmed'] - data['deaths'] - data['recovered']

In [6]:
df_confirmed = pd.read_csv("../Paper/novel-corona-virus-2019-dataset/time_series_covid_19_confirmed.csv")
df_recovered = pd.read_csv("../Paper/novel-corona-virus-2019-dataset/time_series_covid_19_recovered.csv")
df_deaths = pd.read_csv("../Paper/novel-corona-virus-2019-dataset/time_series_covid_19_deaths.csv")

In [7]:
data['country'].unique()

array(['China', 'Hong Kong', 'Macau', 'Taiwan', 'US', 'Japan', 'Thailand',
       'South Korea', 'Singapore', 'Philippines', 'Malaysia', 'Vietnam',
       'Australia', 'Mexico', 'Brazil', 'Colombia', 'France', 'Nepal',
       'Canada', 'Cambodia', 'Sri Lanka', 'Ivory Coast', 'Germany',
       'Finland', 'United Arab Emirates', 'India', 'Italy', 'UK',
       'Russia', 'Sweden', 'Spain', 'Belgium', 'Others', 'Egypt', 'Iran',
       'Israel', 'Lebanon', 'Iraq', 'Oman', 'Afghanistan', 'Bahrain',
       'Kuwait', 'Austria', 'Algeria', 'Croatia', 'Switzerland',
       'Pakistan', 'Georgia', 'Greece', 'North Macedonia', 'Norway',
       'Romania', 'Denmark', 'Estonia', 'Netherlands', 'San Marino',
       ' Azerbaijan', 'Belarus', 'Iceland', 'Lithuania', 'New Zealand',
       'Nigeria', 'North Ireland', 'Ireland', 'Luxembourg', 'Monaco',
       'Qatar', 'Ecuador', 'Azerbaijan', 'Czech Republic', 'Armenia',
       'Dominican Republic', 'Indonesia', 'Portugal', 'Andorra', 'Latvia',
       'Moroc

### Обработка штатов

In [8]:
grouped_us = data[data['country'] == "US"].reset_index()
grouped_us_date = grouped_us.groupby('date')['date', 'confirmed', 'deaths'].sum().reset_index()

# Примеры графиков на несколько линий

In [9]:
cleaned_data = pd.read_csv('../Paper/novel-corona-virus-2019-dataset/covid_19_clean_complete.csv', parse_dates=['Date'])

In [10]:
cleaned_data.rename(columns={'ObservationDate': 'date', 
                     'Province/State':'state',
                     'Country/Region':'country',
                     'Last Update':'last_updated',
                     'Confirmed': 'confirmed',
                     'Deaths':'deaths',
                     'Recovered':'recovered'
                    }, inplace=True)

cases = ['confirmed', 'deaths', 'recovered', 'active']

cleaned_data['active'] = cleaned_data['confirmed'] + cleaned_data['deaths'] + cleaned_data['recovered']
cleaned_data['country'] = cleaned_data['country'].replace('Mainland China', 'China')

cleaned_data[['state']] = cleaned_data[['state']].fillna('')
cleaned_data[cases] = cleaned_data[cases].fillna(0)
cleaned_data.rename(columns={'Date':'date'}, inplace=True)

data = cleaned_data

In [11]:
temp = cleaned_data.groupby('date')['recovered', 'deaths', 'active'].sum().reset_index()
temp = temp.melt(id_vars="date", value_vars=['recovered', 'deaths', 'active'],
                 var_name='case', value_name='count')

In [12]:
temp.tail()

Unnamed: 0,date,case,count
196,2020-03-24,active,544560
197,2020-03-25,active,602438
198,2020-03-26,active,675527
199,2020-03-27,active,751148
200,2020-03-28,active,830307


### Графики количества зараженных, выздорвевших и умерших к дате n.

In [14]:
temp = cleaned_data.groupby('date')['recovered', 'deaths', 'active'].sum().reset_index()
temp = temp.melt(id_vars="date", value_vars=['recovered', 'deaths', 'active'],
                 var_name='case', value_name='count')


fig = px.line(temp, x="date", y="count", color='case',
             title='Cases over time: Line Plot', color_discrete_sequence = ['cyan', 'red', 'orange'])
fig.show()

fig = px.area(temp, x="date", y="count", color='case',
             title='Cases over time: Area Plot', color_discrete_sequence = ['cyan', 'red', 'orange'])
fig.show()


# Считаем уровень опасности, если есть штаты

Далее посчитаем и визуализируем риски для трех случаев(болезнь, выздоровление, смерть).
Функция степени опасности определяется на практике отношением

$$ h(x) = \frac{f(x)}{1 - F(x)}$$

На практике используется оценочная формула
$$h(t) = \frac{d(t, t+\Delta t)}{\Delta t D(t)},  $$

где $ d(t, t+\Delta t) $ - количество заболевших или выздоровевших в промежутке $[t, t+\Delta t]$, $ D(t) $ - количество людей, выживших к времени $t$. В качестве $ \Delta t $ будут взяты одни сутки.

#### Китай

In [13]:
number = 1386000000
Country = "China"

In [14]:
data[data['country'] == Country].reset_index()

Unnamed: 0,index,state,country,Lat,Long,date,confirmed,deaths,recovered,active
0,49,Anhui,China,31.8257,117.2264,2020-01-22,1,0,0,1
1,50,Beijing,China,40.1824,116.4142,2020-01-22,14,0,0,14
2,51,Chongqing,China,30.0572,107.8740,2020-01-22,6,0,0,6
3,52,Fujian,China,26.0789,117.9874,2020-01-22,1,0,0,1
4,53,Gansu,China,37.8099,101.0583,2020-01-22,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
2206,16709,Tianjin,China,39.3054,117.3230,2020-03-28,161,3,133,297
2207,16710,Tibet,China,31.6927,88.0924,2020-03-28,1,0,1,2
2208,16711,Xinjiang,China,41.1129,85.2401,2020-03-28,76,3,73,152
2209,16712,Yunnan,China,24.9740,101.4870,2020-03-28,180,2,172,354


In [15]:
data['sum'] = data['confirmed'] + data['deaths'] + data['recovered']

In [16]:
grouped_us = data[data['country'] == Country].reset_index()
grouped_us_date = grouped_us.groupby('date')['date', 'confirmed', 'deaths', 'sum', 'recovered'].sum().reset_index()

In [17]:
new_data = grouped_us_date.loc[:, ['deaths', 'sum', 'recovered']].diff()
new_data['date'] = grouped_us_date['date']
new_data.rename(columns={'deaths': 'deaths_in_day', 
                     'recovered':'recovered_in_day',
                     'sum':'confirmed_in_day'
                    }, inplace=True)

In [18]:
new_data['умершие за день'] = new_data['deaths_in_day'] / (number - new_data['deaths_in_day'] + new_data['recovered_in_day'])
new_data['выздоровевшиe за день'] = new_data['recovered_in_day'] / (number - new_data['deaths_in_day'] + new_data['recovered_in_day'])
new_data['заболевшие за день'] = new_data['confirmed_in_day'] / (number - new_data['deaths_in_day'] + new_data['recovered_in_day'])

temp = new_data.melt(id_vars="date", value_vars=['выздоровевшиe за день', 'заболевшие за день', 'умершие за день'],
                 var_name='case', value_name='h(t)')


fig = px.line(temp, x="date", y="h(t)", color='case',
             title=Country, color_discrete_sequence = ['red', 'orange', 'green'])
fig.show()
#fig.write_image("../Paper/China.png")

# Графики числа зараженных, болеющих и выздоровших 

### Россия

In [15]:
country = 'Russia'

In [16]:
data_country = data.loc[data['country'] == country, :]

In [24]:
grouped = data_country.groupby('date')['date', 'confirmed', 'deaths', 'recovered'].sum().reset_index()

fig = px.line(grouped, x="date", y="confirmed", 
              title=country + "_confirmed")
fig.show()
import plotly 

plotly.offline.iplot(fig)

In [21]:
grouped = data_country.groupby('date')['date', 'confirmed', 'deaths', 'recovered'].sum().reset_index()

fig = px.line(grouped, x="date", y="confirmed", 
              title=country + "_confirmed")
fig.show()

fig = px.line(grouped, x="date", y='recovered', 
              title=country + "_recovered")
fig.show()

fig = px.line(grouped, x="date", y="deaths", 
              title=country + "_deaths")
fig.show()

# Мировые графики

In [22]:
data['state'] = data['state'].fillna('')
temp = data[[col for col in data.columns if col != 'state']]

latest = temp[temp['date'] == max(temp['date'])].reset_index()
latest_grouped = latest.groupby('country')['confirmed', 'deaths'].sum().reset_index()

In [23]:
fig = px.choropleth(latest_grouped, locations="country", 
                    locationmode='country names', color="confirmed", 
                    hover_name="country", range_color=[1,5000], 
                    color_continuous_scale="peach", 
                    title='Countries with Confirmed Cases')

fig.show()

In [24]:
europe = list(['UK', 'Austria','Belgium','Bulgaria','Croatia','Cyprus','Czech Republic','Denmark','Estonia','Finland','France','Germany','Greece','Hungary','Ireland',
          'Italy', 'Latvia','Luxembourg','Lithuania','Malta','Norway','Netherlands','Poland','Portugal','Romania','Slovakia','Slovenia',
         'Spain', 'Sweden', 'United Kingdom', 'Iceland', 'Russia', 'Switzerland', 'Serbia', 'Ukraine', 'Belarus'])

europe_grouped_latest = latest_grouped[latest_grouped['country'].isin(europe)]

In [25]:
fig = px.choropleth(europe_grouped_latest, locations="country", 
                    locationmode='country names', color="confirmed", 
                    hover_name="country", range_color=[1,2000], 
                    color_continuous_scale='portland', 
                    title='European Countries with Confirmed Cases', scope='europe', height=800)
# fig.update(layout_coloraxis_showscale=False)
fig.show()

### Тут можно запустить по времени. Распространение болезни по миру

In [26]:
formated_gdf = data.groupby(['date', 'country'])['confirmed', 'deaths'].max()
formated_gdf = formated_gdf.reset_index()
formated_gdf['date'] = pd.to_datetime(formated_gdf['date'])
formated_gdf['date'] = formated_gdf['date'].dt.strftime('%m/%d/%Y')
formated_gdf['size'] = formated_gdf['confirmed'].pow(0.3)

fig = px.scatter_geo(formated_gdf, locations="country", locationmode='country names', 
                     color="confirmed", size='size', hover_name="country", 
                     range_color= [0, 1500], 
                     projection="natural earth", animation_frame="date", 
                     title='COVID-19: распространение по миру', color_continuous_scale="portland")
# fig.update(layout_coloraxis_showscale=False)
fig.show()

### Конец

https://www.kaggle.com/imdevskp/corona-virus-report  - прием и вдохновление взятое из некоторых ноутбуков на каггле.

In [None]:
px.offline.plot(figure, "file.html")

### Дальше можно не читать. Полезные приемы, которые встретились по пути

In [None]:
df_confirmed[['Country/Region', '3/14/20']]

In [None]:
df_0 = df_confirmed.loc[df_confirmed['3/14/20'] == 0, ['Province/State', 'Country/Region', '3/14/20']]

In [None]:
df_0.loc[df_0['Country/Region'] != 'US', ['Province/State', 'Country/Region', '3/14/20']]

In [None]:
#df_train['Column_name'].fillna(0)

In [None]:
import pandas as pd
data_Italy = data.loc[data['country'] == 'Italy', :]
recovered = data_Italy['recovered'].values
deaths = data_Italy['deaths'].values
confirmed = data_Italy['confirmed'].values