# COVID-19: Analysis for Standard Chartered Operated Regions

** Gaurav Gupta **

# What is Coronavirus?

    Coronaviruses (CoV) are a large family of viruses that cause illness ranging from the common cold to more
    severe diseases such as Middle East Respiratory Syndrome (MERS-CoV) and Severe Acute Respiratory Syndrome
    (SARS-CoV). A novel coronavirus (nCoV) is a new strain that has not been previously identified in humans.  

### Source - 

https://www.sc.com/en/our-locations/

https://www.worldometers.info/coronavirus/

https://www.livescience.com/new-coronavirus-images.html

## Let's do an Exploratory analysis on the data we have so far. 

   The data has been shared in kaggle @ https://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset.
   
   Real time data on everyday basis is also made available by the Johns Hopkins university @ https://docs.google.com/spreadsheets/d/1yZv9w9zRKwrGTaR-YzmAqMefw4wMlaXocejdxZaTs6w/htmlview?usp=sharing&sle=true#

In [0]:
# import the necessary libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import date, timedelta
from statsmodels.tsa.arima_model import ARIMA
from sklearn.cluster import KMeans
import matplotlib as mpl

from IPython.display import Markdown
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
import pycountry
import folium 
from folium import plugins

%config InlineBackend.figure_format = 'retina'
py.init_notebook_mode(connected=True)

# Utility Functions

'''Display markdown formatted output like bold, italic bold etc.'''
def formatted_text(string):
    display(Markdown(string))

### Import and Exploratory Analysis of data

In [0]:
# Import the data
nCoV_data = pd.read_csv("../input/novel-corona-virus-2019-dataset/covid_19_data.csv")

# Data Glimpse
nCoV_data.tail()

In [0]:
# Convert 'Last Update' column to datetime object
nCoV_data['Last Update'] = nCoV_data['Last Update'].apply(pd.to_datetime)
nCoV_data['ObservationDate'] = nCoV_data['ObservationDate'].apply(pd.to_datetime)

# Also drop the 'SNo' and the 'Date' columns
nCoV_data.drop(['SNo'], axis=1, inplace=True)

# Fill the missing values in 'Province/State' with the 'Country' name.
nCoV_data['Province/State'] = nCoV_data['Province/State'].replace(np.nan, nCoV_data['Country/Region'])

# Data Glimpse
nCoV_data.head()

In [0]:
# Lets rename the columns - 'Province/State' and 'Last Update' to remove the '/' and space respectively.
nCoV_data.rename(columns={'Last Update': 'LastUpdate', 'Province/State': 'State', 'Country/Region': 'Country', 'ObservationDate': 'Date'}, inplace=True)

# Data Glimpse
#nCoV_data.head()

In [0]:
# Active Case = confirmed - deaths - recovered
nCoV_data['Active'] = nCoV_data['Confirmed'] - nCoV_data['Deaths'] - nCoV_data['Recovered']

# Grouping on Basis of country since 
nCoV_data = nCoV_data.groupby(['Date','Country'])[['Confirmed','Deaths','Recovered','Active']].sum().reset_index()
nCoV_data['New_Cases'] = nCoV_data['Confirmed'] - nCoV_data.groupby(['Country'])['Confirmed'].shift(1)

# Check the Data Info again
#nCoV_data.info()

### Read data for SC Operated Countries.

In [0]:
scb = pd.read_excel("../input/sc-regional-data/Countries.xlsx")
scb.rename(columns={'Countries':'Country'},inplace=True)                    
scb.head()

In [0]:
# Merge both the file and Create Final dataset for only SCB specific countries.

nCoV_scb = pd.merge(scb,nCoV_data,on='Country',how='inner',indicator=True)
nCoV_scb['Week_Day'] = nCoV_scb['Date'].dt.week
#nCoV_scb.head()

In [0]:
#Calculate day of year
nCoV_scb['Day_of_Year'] = nCoV_scb['Date'].dt.dayofyear

#nCoV_scb[nCoV_scb['Week_Day'].isnull()]

nCoV_scb['Day_of_Year'] = nCoV_scb['Date'].dt.dayofyear

nCoV_scb['Day_of_Year'] = nCoV_scb['Day_of_Year'].astype(int)

### Summarize the data at Countries as well as Regional level.

In [0]:
nCoV_Region = nCoV_scb.groupby(['Week_Day','Region','Date','Day_of_Year'])[['Confirmed','Deaths','Recovered','Active','New_Cases']].sum().reset_index()
nCoV_Country = nCoV_scb.groupby(['Region','Country','Week_Day','Date','Day_of_Year'])[['Confirmed','Deaths','Recovered','Active','New_Cases']].sum().reset_index()

### Exclude least impacted countries where total cases are less than 500 as of date.

In [0]:
c_list = nCoV_Country[nCoV_Country['Date'] == nCoV_Country.Date.max()]
c_list = c_list[c_list['Confirmed'] > 500]
list = c_list['Country']

nCoV_Country = nCoV_Country[nCoV_Country['Country'].isin(list)]
nCoV_Country.head()

### Final List of Countries, considered for this analysis

In [0]:
# Lets check the total #Countries affected by nCoV

nCoV_Countries = nCoV_Country['Country'].unique().tolist()
print('\n')
print(nCoV_Countries)
print("\n------------------------------------------------------------------")
print("\n Total SCB Operated countries where cases are more than 500: ",len(nCoV_Countries))

# Statistical Analysis

### Create various graph/carts for visualition.

In [0]:
# Create function to create Pie Chart & Bar Chart.

def pie_chart(df,type):
    nCoV_Region1 = df[df['Date'] == df.Date.max()]
    nCoV_Region1.sort_values('Confirmed',ascending=False,inplace=True)
    
    plt.figure(figsize=(10,5))
    colors = ['yellowgreen', 'gold', 'lightskyblue','red','blue','green','orange']
    sns.barplot(x=type,y='Confirmed',data=nCoV_Region1)
    
    plt.xlabel("Impacted Regions")
    plt.ylabel("Total Confrimed Cases")
    plt.show()
        
    plt.figure(figsize=(9,7))
    plt.pie(nCoV_Region1['Confirmed'],autopct='%1.1f%%',shadow=True,colors=colors)
    plt.legend(nCoV_Region1[type],loc='best')
    plt.show()

    return

In [0]:
# Create function for line plots
def regional(df,type,plots,count):
    nCoV_Region_sum = df[df['Confirmed'] > count]
    nCoV_Region_sum['Day_of_Year'] = nCoV_Region_sum.groupby(type)['Day_of_Year'].rank(method='min')
    
    fig, ax = plt.subplots()
    mpl.style.use('seaborn')
    
    plt.title("Standard Chartered : Daily Trend after 1000 cases", fontsize=15)

    nCoV_Region_sum.groupby(type).plot(x='Day_of_Year', y=plots, ax=ax,figsize=(10,6),linewidth=3)
    
    plt.xlabel("No of Days since 1000 Confrimed Cases")
    
    leg = nCoV_Region_sum.groupby(type)[plots].count().reset_index()
    plt.legend(leg[type],loc='best',fontsize=14)
        
    return

In [0]:
# Create function for Daily Cases.
def new_cases(df,type,plots,count):

    nCoV_Region_sum = df[df['Confirmed'] > count]
    nCoV_Region_sum['Day_of_Year'] = nCoV_Region_sum.groupby(type)['Day_of_Year'].rank(method='min').astype(int)
    
#    fig, ax = plt.subplots()
    mpl.style.use('seaborn')
    
    nCoV_Region_sum.plot(x='Day_of_Year',y='New_Cases',kind='bar',figsize=(12,6))
    plt.title("New Cases Day by Day ", fontsize=20)
    
    plt.xlabel("---Days since 100 Confrimed Cases-->")
    plt.ylabel("New Cases every day")
    
    plt.show()
        
    return

### Standard Chartered Segregated it's businesses worldiwde in 4 Geographies, and we will try to understand how these Bank operated Geographies by Covid19.

* ASA - Asia and South ASEAN Countries
* GCNA - Greater China and North Asia
* AEU - Ameria & Europe
* AME - Africa & Midde East*

#### Note for the convince of our understanding we will do the Africa and ME(Middle East) analysis separately.

# Regional Analysis : SC Worldwide
### Let's see how different regions are impacted

In [0]:
pd.options.mode.chained_assignment = None
pie_chart(nCoV_Region,'Region')

### ==> As we see from the plots above, America & Europe are the worst affected regions and America alone have 50% of the cases.

### ==> AME(Africa & ME) are the least impacted regiona and effect is minimal so far.**

### Cases Over time 

#### Lets now understood how cases developed over time.

In [0]:
regional(nCoV_Region,'Region','Confirmed',1000)
plt.ylabel("Confrimed Cases(Cumilative)")
plt.show()

In [0]:
regional(nCoV_Region,'Region','Deaths',500)
plt.ylabel("Total Deaths (Cumilative)")
plt.show()

### INFRENCE :
#### From the above plots we can see that AMERICA & Europe Region moved very fast and 400K cases reached within 40 days after first 1K cases.
#### GCNA able to flatened the curve very fast and Cases in ASA, ME & Africa Growth is relatively low.
#### As you can see Europe & America is following excatly same death rate pattern so the ASA & ME.
#### It signifies that virus is impacting different races diffenently, based upon human Genetic and Enviromentla conditions.

In [0]:

nCoV_Country1 = nCoV_Country[nCoV_Country['Date'] == nCoV_Country.Date.max()]

def sunbust():
    fig = px.sunburst(nCoV_Country1.sort_values(by='Active', ascending=False).reset_index(drop=True), 
                     path=["Region","Country"], values="Active", height=550,
                     title='Number of Active cases as of Date',
                     color_discrete_sequence = px.colors.qualitative.Prism)
    fig.data[0].textinfo = 'label+text+value'
    fig.show()
    return

sunbust()
#nCoV_Country1['Region'].unique()

### Exclude Amercia & Europe for better visibility of other regions.

In [0]:
nCoV_Country1 = nCoV_Country1[~nCoV_Country1['Region'].isin(['Europe','America'])]
nCoV_Country1['Region'].unique()

sunbust()

# 1. ASA Region
### Analysis of the confirmed, Death & Daily Cases accross the region.

In [0]:
nCoV_ASA = nCoV_Country[nCoV_Country['Region']=='ASA']
pie_chart(nCoV_ASA,'Country')

In [0]:
regional(nCoV_ASA,'Country','Confirmed',500)
plt.ylabel("Confrimed Cases(Cumilative)")
plt.show()

In [0]:
regional(nCoV_ASA,'Country','Deaths',100)
plt.ylabel("Total Deaths(Cumilative)")
plt.show()

In [0]:
nCoV_ASA = nCoV_Region[nCoV_Region['Region']=='ASA']
new_cases(nCoV_ASA,'Region','New_Cases',500)

**INFRENCES - ASA Region**
1. India is most impacted in the region, however given the size of population cases per million are relative low.
2. SG is least impacted in however cases per million are high, per death rate is the lowest.
3. No of New Cases every day are in increasing trend.

# 2. GCNA Region
### Analysis of Confirmed, Deaths & Daily New Cases across the region.

In [0]:
nCoV_GCNA = nCoV_Country[nCoV_Country['Region']=='GCNA']
pie_chart(nCoV_GCNA,'Country')

In [0]:
regional(nCoV_GCNA,'Country','Confirmed',500)
plt.ylabel("Confrimed Cases(Cumilative)")
plt.show()

regional(nCoV_GCNA,'Country','Deaths',100)
plt.ylabel("Total Deaths(Cumilative)")
plt.show()

In [0]:
nCoV_ASA = nCoV_Region[nCoV_Region['Region']=='GCNA']
new_cases(nCoV_ASA,'Region','New_Cases',500)

# 3. Europe Region

In [0]:
nCoV_Europe = nCoV_Country[nCoV_Country['Region']=='Europe']
pie_chart(nCoV_Europe,'Country')

In [0]:
regional(nCoV_Europe,'Country','Confirmed',500)
plt.ylabel("Confrimed Cases(Cumilative)")
plt.show()

regional(nCoV_Europe,'Country','Deaths',100)
plt.ylabel("Total Deaths(Cumilative)")
plt.show()

nCoV_Europe = nCoV_Region[nCoV_Region['Region']=='Europe']
new_cases(nCoV_Europe,'Region','New_Cases',500)

**INFERENCES**

1. The number of confirmed cases rise day by day The rise was very steep, specially in france, Germany & UK
2. No of deaths in germany is very low, as comapre to France & UK provided no of confirmed cases are similar.
3. Daily New Cases are declining fast now as you can see in the daily curve.

# 4. ME(Middle East) Region

In [0]:
nCoV_ME = nCoV_Country[nCoV_Country['Region']=='ME']
pie_chart(nCoV_ME,'Country')

In [0]:
regional(nCoV_ME,'Country','Confirmed',500)
plt.ylabel("Confrimed Cases(Cumilative)")
plt.show()

regional(nCoV_ME,'Country','Deaths',100)
plt.ylabel("Total Deaths(Cumilative)")
plt.show()

nCoV_ME = nCoV_Region[nCoV_Region['Region']=='ME']
new_cases(nCoV_ME,'Region','New_Cases',100)

# 5. Africa Region 
### Note - Only countries where cases are more 500 has been considered.

In [0]:
nCoV_Africa = nCoV_Country[nCoV_Country['Region']=='Africa']
pie_chart(nCoV_Africa,'Country')

In [0]:
regional(nCoV_Africa,'Country','Confirmed',500)
plt.ylabel("Confrimed Cases(Cumilative)")
plt.show()

regional(nCoV_Africa,'Country','Deaths',100)
plt.ylabel("Total Deaths(Cumilative)")
plt.show()

nCoV_ME = nCoV_Region[nCoV_Region['Region']=='Africa']
new_cases(nCoV_ME,'Region','New_Cases',500)

# Like/Comment if you like the efforts...