In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime

In [None]:
covid_df = pd.read_csv('covid_19_india.csv')

In [None]:
covid_df.head(10)

In [None]:
covid_df.isnull()

In [None]:
covid_df.describe()

In [None]:
vaccine_df = pd.read_csv('covid_vaccine_statewise.csv')

In [None]:
vaccine_df.head(7)

In [None]:
covid_df.drop(['Sno','Time','ConfirmedIndianNational','ConfirmedForeignNational'], inplace = True, axis = 1)

In [None]:
covid_df.dropna(how = 'all', inplace = True)

In [None]:
covid_df.isnull().sum()

In [None]:
covid_df.head()

In [None]:
covid_df['Date'] = pd.to_datetime(covid_df['Date'], format = '%Y-%m-%d')

In [None]:
covid_df[['Cured', 'Deaths', 'Confirmed']] = covid_df[['Cured', 'Deaths', 'Confirmed']].astype(int)

In [None]:
covid_df.info()

**Active cases**

In [None]:
covid_df['Active_Cases'] = covid_df['Confirmed'] - (covid_df['Cured'] + covid_df['Deaths'])
covid_df.tail()

In [None]:
statewise = pd.pivot_table(covid_df, values = ['Confirmed', 'Deaths', 'Cured'],
                           index = 'State/UnionTerritory', aggfunc = max)
statewise

In [None]:
statewise['Recovery Rate'] = statewise['Cured']*100 / statewise['Confirmed']

In [None]:
statewise['Mortality Rate'] = statewise['Deaths']*100 / statewise['Confirmed']

In [None]:
statewise.sort_values('Confirmed', ascending = False)

In [None]:
statewise.style.background_gradient(cmap = 'coolwarm')

**Top 10 active cases states**

In [None]:
most_cases_perday = covid_df.groupby('State/UnionTerritory').max()[
    ['Active_Cases', 'Date']].sort_values(by = 'Active_Cases', ascending = False)

most_cases_perday

In [None]:
fig = plt.figure(figsize = (15,6))
bar_plot = sns.barplot(data = most_cases_perday[:10], x = 'State/UnionTerritory', y = 'Active_Cases', palette = 'viridis')
plt.xlabel('State')
plt.ylabel('Total Active Cases')
plt.show()

**Top states with highest deaths**

In [None]:
highest_deaths = covid_df.groupby('State/UnionTerritory').sum()['Deaths'].sort_values(ascending = False)

highest_deaths

In [None]:
fig = plt.figure(figsize = (15,6))
bar_plot = sns.barplot(data = highest_deaths[:10].reset_index(), x = 'State/UnionTerritory', y = 'Deaths', palette = 'viridis')
plt.xlabel('State')
plt.ylabel('Total Deaths')
plt.show()

**Growth Trend**

In [None]:
selected_columns = ['State/UnionTerritory', 'Active_Cases']
growth_df = covid_df[selected_columns]

In [None]:
growth_df['Month Year'] = covid_df['Date'].apply(lambda x:x.strftime('%Y-%m'))

In [None]:
filtered_city = ['Maharashtra', 'Karnataka', 'Kerala', ' Uttar Pradesh', 'Tamil Nadu']
data_lineplot = growth_df[growth_df['State/UnionTerritory'].isin(filtered_city)]
data_lineplot_2020 = data_lineplot[data_lineplot['Month Year'].between('2020-01', '2021-12')] # kalau mau 01-2020 s/d 01-2021 nti pakainya between yaa

In [None]:
fig = plt.figure(figsize = (15,6))
line_plot = sns.lineplot(data = data_lineplot_2020, x = 'Month Year', y = 'Active_Cases', hue = 'State/UnionTerritory')
line_plot.set_title('Top 5 Affectd States in India', size = 16)
plt.show()

**Covid Vaccine**

In [None]:
vaccine_df.head()

In [None]:
vaccine_df['Updated On'] = pd.to_datetime(vaccine_df['Updated On'].str.replace('/', '-'))

In [None]:
vaccine_df.rename(columns = {'Updated On' : 'Vaccine Date'}, inplace = True)

In [None]:
vaccine_df.info()

In [None]:
check = vaccine_df.columns[12:17]
check

In [None]:
vaccination_df = vaccine_df.drop(check, axis = 1)

**Male vs Female vaccination**

In [None]:
male_vc = vaccination_df[vaccination_df.columns[-4]].sum()
female_vc = vaccination_df[vaccination_df.columns[-3]].sum()

In [None]:
sex_chart = px.pie(names = ['Male', 'Female'], values = [male_vc, female_vc], title = 'Sex Compare Vaccination')
sex_chart.update_layout(height = 400, width = 400)

**Remove rows where state = India**

In [None]:
excl_india =vaccine_df.drop(vaccine_df[vaccine_df['State'] == 'India'].index)
excl_india

In [None]:
excl_india.rename(columns = {'Total Individuals Vaccinated': 'Total'}, inplace = True)

In [None]:
excl_india

**Most Vaccinated State**

In [None]:
max_vc = excl_india.groupby('State').sum()['Total'].to_frame('Total').sort_values(by = 'Total', ascending = False).head(5)
max_vc

In [None]:
fig = plt.figure(figsize = (10,5))
most_vc = sns.barplot(data = max_vc, x= max_vc.index, y = max_vc['Total'])
plt.xlabel("State")
plt.ylabel('Total Vaccinated')
plt.title('Top 10 Vaccinated States in India')
plt.show()