In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json

%matplotlib inline

In [2]:
population = pd.read_csv('Malaysia_Population_18yo.csv')
registered = pd.read_csv('Registered.csv')
vaccination = pd.read_csv('Vaccination.csv')
malaysia_states = json.load(open('Malaysia.geojson'))

In [3]:
vax_df = pd.DataFrame(vaccination.T[37])
vax_df['Latest'] = vax_df
vax_df = vax_df.drop(37, axis=1).iloc[1:].reset_index()

vax_df.head()

Unnamed: 0,index,Latest
0,dose1_perlis,10016
1,dose2_perlis,8026
2,dose1_kedah,33449
3,dose2_kedah,24698
4,dose1_penang,35728


In [4]:
dose1_df = vax_df.iloc[::2].reset_index().drop('level_0', axis=1)
dose2_df = vax_df.iloc[1::2].drop('index', axis=1).reset_index().drop('index', axis=1)

In [5]:
df = pd.DataFrame(data=dose1_df)

In [6]:
df

Unnamed: 0,index,Latest
0,dose1_perlis,10016
1,dose1_kedah,33449
2,dose1_penang,35728
3,dose1_perak,52639
4,dose1_selangor,76273
5,dose1_kl,52110
6,dose1_putrajaya,7222
7,dose1_ns,29765
8,dose1_melaka,16566
9,dose1_kelantan,25293


In [7]:
df['Dose 2'] = dose2_df

In [8]:
df

Unnamed: 0,index,Latest,Dose 2
0,dose1_perlis,10016,8026
1,dose1_kedah,33449,24698
2,dose1_penang,35728,24862
3,dose1_perak,52639,37884
4,dose1_selangor,76273,51383
5,dose1_kl,52110,33603
6,dose1_putrajaya,7222,4541
7,dose1_ns,29765,17160
8,dose1_melaka,16566,10419
9,dose1_kelantan,25293,18951


In [9]:
#Obtain state name and remove dose
df['State'] = df['index'].str.split('_').str[1]

#Remove index
df = df.drop('index', axis=1)

#Rearrange columns
df = df[['State', 'Latest', 'Dose 2']]

#Rename columns
df.columns = ['state', 'dose_1', 'dose_2']

In [10]:
df

Unnamed: 0,state,dose_1,dose_2
0,perlis,10016,8026
1,kedah,33449,24698
2,penang,35728,24862
3,perak,52639,37884
4,selangor,76273,51383
5,kl,52110,33603
6,putrajaya,7222,4541
7,ns,29765,17160
8,melaka,16566,10419
9,kelantan,25293,18951


In [11]:
#Choropleth map
import plotly.express as px

In [12]:
#Import plotly.io to render in browser
import plotly.io as pio
pio.renderers.default = 'browser'

In [13]:
malaysia_states['features'][0]['properties']

{'gid': 1,
 'id': 2939653,
 'short': 'johor',
 'country': 'MYS',
 'name': 'Johor',
 'enname': None,
 'locname': 'Johor',
 'offname': None,
 'boundary': 'administrative',
 'adminlevel': 4,
 'wikidata': 'Q183032',
 'wikimedia': 'en:Johor',
 'timestamp': '2019-12-27 23:00:01',
 'note': None,
 'path': '0,2108121,2939653',
 'rpath': '2939653,2108121,0',
 'iso3166_2': 'MY-01',
 'tid': None,
 'territory_name': None}

In [14]:
#Dict for mapping GeoJSON with dataframe
state_id_map = {}
for feature in malaysia_states['features']:
    feature['id'] = feature['properties']['id']
    state_id_map[feature['properties']['short']] = feature['id']

In [15]:
state_id_map

{'johor': 2939653,
 'kedah': 4444908,
 'kelantan': 4443571,
 'kl': 2939672,
 'labuan': 4521286,
 'melaka': 2939673,
 'ns': 2939674,
 'pahang': 4444595,
 'penang': 4445131,
 'perak': 4445076,
 'perlis': 4444918,
 'putajaya': 4443881,
 'sabah': 3879783,
 'sarawak': 3879784,
 'selangor': 2932285,
 'terengganu': 4444411}

In [16]:
state_id_map['putrajaya'] = state_id_map['putajaya']
del state_id_map['putajaya']

In [17]:
state_id_map

{'johor': 2939653,
 'kedah': 4444908,
 'kelantan': 4443571,
 'kl': 2939672,
 'labuan': 4521286,
 'melaka': 2939673,
 'ns': 2939674,
 'pahang': 4444595,
 'penang': 4445131,
 'perak': 4445076,
 'perlis': 4444918,
 'sabah': 3879783,
 'sarawak': 3879784,
 'selangor': 2932285,
 'terengganu': 4444411,
 'putrajaya': 4443881}

In [18]:
df['id'] = df['state'].apply(lambda x: state_id_map[x])

In [19]:
#Convert object-type values to float
df['dose_1'] = pd.to_numeric(df['dose_1'])
df['dose_2'] = pd.to_numeric(df['dose_2'])

In [36]:
#Population data
df['population'] = population['state'].apply(lambda x: state_id_map[x])
df['population'] = population['population']

In [44]:
#Feature engineering for percentage of population vaccinated
df['dose1_pct'] = (df['dose_1'] / df['population']) * 100
df['dose2_pct'] = (df['dose_2'] / df['population']) * 100

In [45]:
df

Unnamed: 0,state,dose_1,dose_2,id,population,dose1_pct,dose2_pct
0,perlis,10016,8026,4444918,187756,5.334583,4.274697
1,kedah,33449,24698,4444908,1595396,2.096595,1.54808
2,penang,35728,24862,4445131,1385097,2.579458,1.794965
3,perak,52639,37884,4445076,1932525,2.723846,1.960337
4,selangor,76273,51383,2932285,4838547,1.576362,1.061951
5,kl,52110,33603,2939672,1481138,3.518241,2.268729
6,putrajaya,7222,4541,4443881,56913,12.689544,7.978845
7,ns,29765,17160,2939674,850890,3.498102,2.016712
8,melaka,16566,10419,2939673,701711,2.360801,1.484799
9,kelantan,25293,18951,4443571,1237273,2.044254,1.531675


In [48]:
#Dose 1 percentage choropleth
fig = px.choropleth(df, 
                    locations='id', 
                    geojson=malaysia_states, 
                    color='dose1_pct',
                   color_continuous_scale="hot")

fig.update_geos(fitbounds='locations', visible=False)

fig.show()

In [21]:
#Dose 2 absolute choropleth
fig = px.choropleth(df, 
                    locations='id', 
                    geojson=malaysia_states, 
                    color='dose_2',
                   color_continuous_scale="Viridis")

# fig.add_scattergeo(
#   geojson=malaysia_states,
#   locations = df['id'],
#   text = df['state'],
#   featureidkey="properties.short",
#   mode = 'text') 

# textfont=dict(
#         family="sans serif",
#         size=8,
#         color="Red"
#     )

fig.update_geos(fitbounds='locations', visible=False)

fig.show()