# **Step 1: Load Packages, Indicate Color Scheme and Importing Data**

In [None]:
# datetime operations
from datetime import timedelta

# for numerical analyiss
import numpy as np

# to store and process data in dataframe
import pandas as pd

# basic visualization package
import matplotlib.pyplot as plt

# advanced ploting
import seaborn as sns

# interactive visualization
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# for offline ploting
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)

# hide warnings
import warnings
warnings.filterwarnings('ignore')

# color pallette
# Hexademical code RRGGBB (True Black #000000, True White #ffffff)
cnf, dth, rec, act = '#393e46', '#ff2e63', '#21bf73', '#fe9801' 

# list files
!ls ../input/corona-virus-report

# **Step 2: Wrangle Data**

In [None]:
# Country wise
country_wise = pd.read_csv('../input/corona-virus-report/country_wise_latest.csv')

# Replace missing values '' with NAN and then 0
country_wise = country_wise.replace('', np.nan).fillna(0)

# Grouped by day, country
full_grouped = pd.read_csv('../input/corona-virus-report/full_grouped.csv')

# Convert Date from Dtype "Object" (or String) to Dtype "Datetime"
full_grouped['Date'] = pd.to_datetime(full_grouped['Date'])

# Day wise
day_wise = pd.read_csv('../input/corona-virus-report/day_wise.csv')
day_wise['Date'] = pd.to_datetime(day_wise['Date'])

# **Step 3: Visualisation Japan**

In [None]:
# Use Boolean indexing to generate a mask which is just a series of boolean values representing whether the column contains the specific element or not
selected = full_grouped['Country/Region'].str.contains('Japan')

# Apply this mask to our original DataFrame to filter the required values.
japan = full_grouped[selected]
japan["New active"] = japan["Active"].diff()

#Melting Adjustments
temp = japan.melt(id_vars="Date", value_vars=['New cases', 'New deaths'],
                 var_name='Case', value_name='Count')

japan.info()
japan.head(10)
japan.describe()

#Figure Creation
fig = px.area(temp, x="Date", y="Count", color='Case', height=600, width=1200,
             title='Cases over time in Japan', color_discrete_sequence = [rec, dth, act])
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()


The unusual patterns in the data is that there is a second surge of new cases starting from July 2020. This surge is even worse than the one between April - May. Usually, the 2nd wave tends to be less than the first wave, hence, Japan 2nd wave having more infection than 1st wave seems intriguing.

REASON FOR 2ND WAVE HIGHER THAN 1ST WAVE:

The lower initial wave infectioms is because of little testing for Covid-19. Between February to late April, only 10,981 people have been tested for Covid-19. https://www.bbc.com/news/world-asia-52466834

The number of SWAP test done in July has increased to 30,000 per day, this might be the reason why there is a huge surge of new cases. https://www.japantimes.co.jp/news/2020/07/10/national/emergency-japans-covid-19-testing/

REASONS OF DECLINE AND 2ND SURGE:

* Reason for decline and flattened infection is because the first state of emergency was announced on 6th April.

* The First reason for this is beacuse Japan started to open entertainment and recreational facilities on 11th June. These include karaoke establishments, bars, pachinko parlors, arcades and amusements parks, among others. Restaurants and bars will be invited to stay open until midnight, and public events will be limited to a thousand people. 

Source: https://www.japantimes.co.jp/news/2020/06/11/national/tokyo-reopening-coronavirus/



In [None]:
# Use Boolean indexing to generate a mask which is just a series of boolean values representing whether the column contains the specific element or not
selected = full_grouped['Country/Region'].str.contains('China')

# Apply this mask to our original DataFrame to filter the required values.
china = full_grouped[selected]
china["New active"] = china["Active"].diff()

#Melting Adjustments
temp1 = china.melt(id_vars="Date", value_vars=['New cases'],
                 var_name='Case', value_name='Count')

#Figure Creation
fig = px.area(temp1, x="Date", y="Count", color='Case', height=600, width=1200,
             title='Cases over time in China', color_discrete_sequence = [rec, dth, act])
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

In [None]:
# Use Boolean indexing to generate a mask which is just a series of boolean values representing whether the column contains the specific element or not
selected = full_grouped['Country/Region'].str.contains('US')

# Apply this mask to our original DataFrame to filter the required values.
us = full_grouped[selected]
us["New active"] = us["Active"].diff()

#Melting Adjustments
temp2 = us.melt(id_vars="Date", value_vars=['New cases'],
                 var_name='Case', value_name='Count')

#Figure Creation
fig = px.area(temp2, x="Date", y="Count", color='Case', height=600, width=1200,
             title='Cases over time in USA', color_discrete_sequence = [rec, dth, act])
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

In [None]:
# Use Boolean indexing to generate a mask which is just a series of boolean values representing whether the column contains the specific element or not
selected = full_grouped['Country/Region'].str.contains('Taiwan')

# Apply this mask to our original DataFrame to filter the required values.
taiwan = full_grouped[selected]
taiwan["New active"] = taiwan["Active"].diff()

#Melting Adjustments
temp3 = taiwan.melt(id_vars="Date", value_vars=['New cases'],
                 var_name='Case', value_name='Count')

#Figure Creation
fig = px.area(temp3, x="Date", y="Count", color='Case', height=600, width=1200,
             title='Cases over time in Taiwan', color_discrete_sequence = [rec, dth, act])
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

In [None]:
# Use Boolean indexing to generate a mask which is just a series of boolean values representing whether the column contains the specific element or not
selected = full_grouped['Country/Region'].str.contains('Korea')

# Apply this mask to our original DataFrame to filter the required values.
korea = full_grouped[selected]
korea["New active"] = korea["Active"].diff()

#Melting Adjustments
temp4 = korea.melt(id_vars="Date", value_vars=['New cases'],
                 var_name='Case', value_name='Count')

#Figure Creation
fig = px.area(temp4, x="Date", y="Count", color='Case', height=600, width=1200,
             title='Cases over time in Korea', color_discrete_sequence = [rec, dth, act])
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()