# UK Covid Cases Analysis

### Installation of Pandas and Upload of the data file

In [2]:
!pip install pandas
!pip install numpy

import pandas as pd
import numpy as np

pd.set_option("display.max_columns", None)

# Extract the file containing UK Covid Cases data
uk_cases=pd.read_csv("/Users/shaunman/Documents/LSE/Course 2/Course 2 Final/LSE_DA201_Assignment_files/covid_19_uk_cases.csv")




### Printing the first and last 5 rows as well as the shape of the dataframe

In [3]:
# First Five Rows
print(uk_cases.head())

# Last five rows
print(uk_cases.tail())

# No of rows and columns within the dataframe 
print(uk_cases.shape)

uk_cases.describe()








  Province/State  Country/Region      Lat     Long ISO 3166-1 Alpha 3-Codes  \
0       Anguilla  United Kingdom  18.2206 -63.0686                      AIA   
1       Anguilla  United Kingdom  18.2206 -63.0686                      AIA   
2       Anguilla  United Kingdom  18.2206 -63.0686                      AIA   
3       Anguilla  United Kingdom  18.2206 -63.0686                      AIA   
4       Anguilla  United Kingdom  18.2206 -63.0686                      AIA   

                   Sub-region Name  Intermediate Region Code        Date  \
0  Latin America and the Caribbean                        29  2020-01-22   
1  Latin America and the Caribbean                        29  2020-01-23   
2  Latin America and the Caribbean                        29  2020-01-24   
3  Latin America and the Caribbean                        29  2020-01-25   
4  Latin America and the Caribbean                        29  2020-01-26   

   Deaths  Cases  Recovered  Hospitalised  
0     0.0    0.0        

Unnamed: 0,Lat,Long,Intermediate Region Code,Deaths,Cases,Recovered,Hospitalised
count,7584.0,7584.0,7584.0,7582.0,7582.0,7582.0,7582.0
mean,21.840267,-41.440047,12.5,6210.202321,214708.2,454.691242,685.228568
std,28.305518,30.527529,14.009849,24860.790148,1007557.0,1010.642337,819.256635
min,-51.7963,-81.2546,0.0,0.0,0.0,0.0,0.0
25%,17.851075,-64.667625,0.0,0.0,12.0,0.0,105.0
50%,20.50365,-60.855483,2.5,1.0,168.5,13.0,390.0
75%,39.448675,-5.152225,29.0,25.0,1585.0,312.0,955.75
max,55.3781,-2.3644,29.0,138237.0,8317439.0,8322.0,4907.0


### Identifying and filtering out the missing values from the dataframe

In [4]:
# Number of missing values
print(uk_cases.isna().any(axis=1).sum())

# Printing the rows containing the missing values
missing_cases=uk_cases[uk_cases.isna().any(axis=1)]
print(missing_cases)

# Filtered out those rows containing NaN values
filter_cases=uk_cases.dropna()


2
    Province/State  Country/Region      Lat     Long ISO 3166-1 Alpha 3-Codes  \
875        Bermuda  United Kingdom  32.3078 -64.7505                      BMU   
876        Bermuda  United Kingdom  32.3078 -64.7505                      BMU   

      Sub-region Name  Intermediate Region Code        Date  Deaths  Cases  \
875  Northern America                         0  2020-09-21     NaN    NaN   
876  Northern America                         0  2020-09-22     NaN    NaN   

     Recovered  Hospitalised  
875        NaN           NaN  
876        NaN           NaN  


### Filtering & Subsetting the data for Cases within Gibraltar

In [5]:
# Filtering results of Gibraltar from the data
gibr_cases = filter_cases[filter_cases['Province/State'] == 'Gibraltar']
gibr_cases

# Subsetting the Dataframe to display the following columns: Deaths, Cases, Recovered and Hospitalised
gb_sub=gibr_cases[['Date','Cases', 'Deaths', 'Recovered', 'Hospitalised']]
gb_sub.describe().apply(lambda s: s.apply('{0:.5f}'.format))







Unnamed: 0,Cases,Deaths,Recovered,Hospitalised
count,632.0,632.0,632.0,632.0
mean,2237.10918,40.20886,1512.8212,1027.625
std,2136.26809,45.33283,1817.09675,1145.68106
min,0.0,0.0,0.0,0.0
25%,177.0,0.0,109.5,157.75
50%,1036.5,5.0,323.5,675.5
75%,4286.0,94.0,4122.5,1548.0
max,5727.0,97.0,4670.0,4907.0


### Importing the files containing UK Vaccination data

In [6]:
# Extract the file containing Vaccination Data
cov_vac=pd.read_csv("/Users/shaunman/Documents/LSE/Course 2/Course 2 Final/LSE_DA201_Assignment_files/covid_19_uk_vaccinated.csv")


### Merge list of UK Covid Cases and UK Vaccination Data

In [7]:
# Merge 2 dataframes with use of outerjoin.
uk_vac=pd.merge(uk_cases,cov_vac,how='outer')
print(uk_vac.shape)


(7584, 15)


### Missing Values filtered from Dataframe

In [89]:
# Number of missing values
print(uk_vac.isna().any(axis=1).sum())

# Filtered out those rows containing NaN values
ukvac_cases=uk_vac.dropna()

# Comparing the output of Vaccinated column and Second Dose column
print(sum(ukvac['Vaccinated']-ukvac['Second Dose']))

#The output generated reveals that they are the same.
#Hence, invidivuals are considered Vaccinated if they have received 2nd dose and this column can be excluded.

# Filtering out extra columns from the dataframe
ukvac=ukvac_cases[['Province/State','Date','First Dose','Second Dose']]


# Print shape of the new dataframe
print(ukvac.shape)

2
0
(7582, 4)


### Converting the data type from object to Date

In [90]:
# Import the datetime module and datetime class.
from datetime import datetime

# Current date and time .
now = datetime.now() 
print('Current time', now)

# Converting the data type of the Date Column from object to Date 
ukvac['Date']=pd.to_datetime(ukvac['Date'],format='%Y-%m-%d')
ukvac.info()


Current time 2022-07-07 16:55:50.409929
<class 'pandas.core.frame.DataFrame'>
Int64Index: 7582 entries, 0 to 7583
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Province/State  7582 non-null   object        
 1   Date            7582 non-null   datetime64[ns]
 2   First Dose      7582 non-null   int64         
 3   Second Dose     7582 non-null   int64         
dtypes: datetime64[ns](1), int64(2), object(1)
memory usage: 296.2+ KB


In [91]:

# Creating column containing those individual who have received only the first dose
ukvac['Fdose_only']=ukvac['First Dose'] - ukvac['Second Dose']
ukvac['total_dose']=ukvac['First Dose'] + ukvac['Second Dose']
ukvac['Fdose_perc']=ukvac['First Dose']/ukvac['total_dose']

In [92]:
# Group by Country/Region
state_group=ukvac.groupby('Province/State')
firstdose_group=ukvac.groupby('First Dose')
seconddose_group=ukvac.groupby('Second Dose')
timegroup=ukvac.groupby('Date')

In [93]:
# 3.4.a Province/State having the highest number of individuals with a first dose but not a second dose
prov=state_group.sum().sort_values('Fdose_only',ascending=False)
prov

Unnamed: 0_level_0,First Dose,Second Dose,Fdose_only,total_dose,Fdose_perc
Province/State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Gibraltar,5870786,5606041,264745,11476827,131.947648
Montserrat,5401128,5157560,243568,10558688,131.948054
British Virgin Islands,5166303,4933315,232988,10099618,131.948224
Anguilla,4931470,4709072,222398,9640542,131.948124
Isle of Man,4226984,4036345,190639,8263329,131.948702
Falkland Islands (Malvinas),3757307,3587869,169438,7345176,131.948003
Cayman Islands,3522476,3363624,158852,6886100,131.947888
Channel Islands,3287646,3139385,148261,6427031,131.948645
Turks and Caicos Islands,3052822,2915136,137686,5967958,131.948629
Bermuda,2817981,2690908,127073,5508889,131.947652


In [94]:
#3.4.b Province/State having the highest percentage of individuals with a first dose but not a second dose.
prov_rate=state_group.sum().sort_values('Fdose_perc',ascending=False)
prov_rate


Unnamed: 0_level_0,First Dose,Second Dose,Fdose_only,total_dose,Fdose_perc
Province/State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Isle of Man,4226984,4036345,190639,8263329,131.948702
Channel Islands,3287646,3139385,148261,6427031,131.948645
Turks and Caicos Islands,3052822,2915136,137686,5967958,131.948629
British Virgin Islands,5166303,4933315,232988,10099618,131.948224
Anguilla,4931470,4709072,222398,9640542,131.948124
Montserrat,5401128,5157560,243568,10558688,131.948054
Falkland Islands (Malvinas),3757307,3587869,169438,7345176,131.948003
Cayman Islands,3522476,3363624,158852,6886100,131.947888
Others,2583151,2466669,116482,5049820,131.947868
"Saint Helena, Ascension and Tristan da Cunha",2348310,2242421,105889,4590731,131.947796


In [88]:

#3.4.c How has the number of vaccinated individuals and individuals \n
# received the first and second doses, changed over time?




0
