In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.offline as py
import plotly.express as px
from plotly.offline import iplot

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Surabaya is the capital of the Indonesian province of East Java and the second-largest city in Indonesia after Jakarta. Located on northeastern border of Java island, on the Madura Strait, it is one of the earliest port cities in Southeast Asia.

According to the National Development Planning Agency, Surabaya is one of the four main central cities of Indonesia, alongside Jakarta, Medan, and Makassar. The city has a population of 2.89 million within its city limits in 2019 and 9.5 million in the extended Surabaya metropolitan area, making it the second-largest metropolitan area in Indonesia.

https://en.wikipedia.org/wiki/Surabaya

![](https://i.ytimg.com/vi/fUBnB4PcHnA/maxresdefault.jpg)youtube.com

In [None]:
df = pd.read_csv("../input/case-vaccination-covid19-indonesia-dataset/Vaccination Data/Vaksinasi Jawa Timur.csv", delimiter=',')
df.tail()

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
#Correlation map to see how features are correlated with each other and with target
corrmat = df.corr(method='kendall')
plt.subplots(figsize=(12,9))
sns.heatmap(corrmat, vmax=0.9, square=True)

In [None]:
fig = px.parallel_categories(df, color="Vaccinated", color_continuous_scale=px.colors.sequential.OrRd)
fig.show()

In [None]:
fig = px.bar(df, 
             x='Date', y='Vaccinated',color_discrete_sequence=['blue'],
             title='Covid Vaccination in Indonesia', text='City_or_Regency')
fig.show()

In [None]:
fig = px.line(df, x="Date", y="Fully_Vaccinated", color_discrete_sequence=['darksalmon'], 
              title="Covid Fully Vaccinated")
fig.show()

In [None]:
fig = px.scatter(df, x="Date", y="Total",color_discrete_sequence=['#4257f5'], title="Covid Total Vaccinated" )
fig.show()

In [None]:
#word cloud
from wordcloud import WordCloud, ImageColorGenerator
text = " ".join(str(each) for each in df.City_or_Regency)
# Create and generate a word cloud image:
wordcloud = WordCloud(max_words=200,colormap='Set2', background_color="black").generate(text)
plt.figure(figsize=(10,6))
plt.figure(figsize=(15,10))
# Display the generated image:
plt.imshow(wordcloud, interpolation='Bilinear')
plt.axis("off")
plt.figure(1,figsize=(12, 12))
plt.show()

In [None]:
surabaya = df[(df['City_or_Regency']=='KOTA SURABAYA')].reset_index(drop=True)
surabaya.head()

In [None]:
fig = px.bar(surabaya, 
             x='Date', y='Vaccinated',color_discrete_sequence=['crimson'],
             title='Covid Vaccination in Surabaya', text='City_or_Regency')
fig.show()

In [None]:
fig = px.line(surabaya, x="Date", y="Fully_Vaccinated", color_discrete_sequence=['darksalmon'], 
              title="Fully Vaccinated in Surabaya")
fig.show()

In [None]:
fig = px.scatter(surabaya, x="Date", y="Total",color_discrete_sequence=['#DC143C'], title="Total Vaccinated in Surabaya" )
fig.show()

In [None]:
fig = px.bar(surabaya,
             y='Date',
             x='Total',
             orientation='h',
             color='Fully_Vaccinated',
             title='Total Vaccinated in Surabaya',
             opacity=0.8,
             color_discrete_sequence=px.colors.diverging.RdBu,
             template='plotly_white'
            )
fig.update_xaxes(range=[0,35])
fig.show()

In [None]:
fig, ax = plt.subplots(1,3, figsize = (20,6), sharex=True)
plt.xticks(rotation=45)
sns.countplot(x='Vaccinated',data=surabaya, palette="copper", ax=ax[0])
plt.xticks(rotation=45)
sns.countplot(x='Fully_Vaccinated', palette="ocean", data=surabaya,ax=ax[1])
plt.xticks(rotation=45)
sns.countplot(x='Total', palette="cubehelix", data=surabaya,ax=ax[2])
plt.xticks(rotation=45)
ax[0].title.set_text('Covid Vaccinated in Surabaya')
plt.xticks(rotation=45)
ax[1].title.set_text('Covid Fully Vaccinated in Surabaya')
plt.xticks(rotation=45)
ax[2].title.set_text('Total Vaccinated in Surabaya')
plt.show()

#I don't know why the rotation of the xticks worked only in the 3rd countplot. Any hint?

In [None]:
!pip install klib

In [None]:
import sys

sys.path.append('../input/klib-library-python')

In [None]:
import klib

In [None]:
klib.cat_plot(surabaya, figsize=(50,15))

#Data Cleaning

In [None]:
surabaya_cleaned = klib.data_cleaning(surabaya)

#Display all correlations data

In [None]:
klib.corr_plot(surabaya_cleaned, annot=False)

In [None]:
#Display only positive correlation

klib.corr_plot(surabaya_cleaned, split='pos', annot=False)

In [None]:
#Correlation data matrix

klib.corr_mat(surabaya_cleaned)

In [None]:
klib.dist_plot(surabaya_cleaned['fully_vaccinated'])

In [None]:
klib.dist_plot(surabaya_cleaned['vaccinated'])

In [None]:
klib.dist_plot(surabaya_cleaned['total'])

In [None]:
#word cloud
from wordcloud import WordCloud, ImageColorGenerator
text = " ".join(str(each) for each in surabaya.City_or_Regency)
# Create and generate a word cloud image:
wordcloud = WordCloud(max_words=200,colormap='Reds', background_color="Red").generate(text)
plt.figure(figsize=(10,6))
plt.figure(figsize=(15,10))
# Display the generated image:
plt.imshow(wordcloud, interpolation='Bilinear')
plt.axis("off")
plt.figure(1,figsize=(12, 12))
plt.show()

#References

https://www.kaggle.com/sripaadsrinivasan/speed-up-your-data-science-projects-using-klib/comments

https://klib.readthedocs.io/en/latest/

https://towardsdatascience.com/speed-up-your-data-cleaning-and-preprocessing-with-klib-97191d320f80