## **Initial Imports and Data Formatting**

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import plotly.express as px
import plotly.graph_objects as go

### **'df_death' Data Source**:
**Reference**: https://www.deccanherald.com/national/coronavirus-india-update-state-wise-total-number-of-confirmed-cases-deaths-on-july-8-858424.html

**Note**: 
1. This data constitues number of covid deaths and positive cases in respective States/UTs till 8th July 2021
2. The tallies include foreigners who have been infected with Covid-19
3. Data may vary based on the source used

In [None]:
df = pd.read_csv('../input/covid19-india-statewise-vaccine-data/COVID-19 India Statewise Vaccine Data.csv')
df_death = pd.read_csv('../input/covid-deaths-8th-july-2021/Covid Deaths - 8th July 2021.txt')

## **Data pre-processing**

In [None]:
df_death = df_death.rename(columns = {'States':'State/UTs'})
df_death.head()

### **Reformatting data to correct format**

In [None]:
df_death[(df_death['State/UTs'] == 'Dadar and Nagar Haveli') | (df_death['State/UTs'] == 'Daman and Diu')].sum()

In [None]:
df_death = df_death[(df_death['State/UTs'] != 'Dadar and Nagar Haveli') & (df_death['State/UTs'] != 'Daman and Diu')]

In [None]:
df_temp = pd.DataFrame([['Dadar and Nagar Haveli and Daman and Diu', 239, 0]],
                       columns= ['State/UTs','Positive cases','Deaths'])
df_death.append(df_temp,
                ignore_index=True)

In [None]:
df_death['State/UTs'] = df_death['State/UTs'].replace({'Andaman and Nicobar Islands': 'Andaman and Nicobar'})
df_death.head()

In [None]:
df_join = df.join(df_death.set_index('State/UTs'),
        on = 'State/UTs',
        how='inner')
df_join.head()

### **Basic Information of the original dataframe**

In [None]:
df.head()

In [None]:
df.info()

### **Total number of null values**

In [None]:
df.isnull().sum()

### **Summary Statistics in non-scientific notation (original dataframe)**

In [None]:
df.describe().apply(lambda s: s.apply('{0:.4f}'.format))

### **Summary Statistics in non-scientific notation (concatenated dataframe)**

In [None]:
df_join.describe().apply(lambda s: s.apply('{0:.4f}'.format))

## **Exploratory Data Analysis and Visualizations**
* **Note**: For initial visualizations not concerning positive cases and deaths, we will use the original dataset i.e. 'df' since it has entries for **'Miscellaneous'**

### **DOSE 1 v/s STATE/UTs**

In [None]:
fig = px.bar(df, 
             x='State/UTs', 
             y='Dose 1', 
             height=700,
             color='Dose 1')
fig.show()

### **DOSE 2 v/s STATE/UTs**

In [None]:
fig = px.bar(df, 
             x='State/UTs', 
             y='Dose 2', 
             height=700,
             color='Dose 2')
fig.show()

### **TOTAL VACCINATION DOSES v/s STATE/UTs**

In [None]:
fig = px.bar(df, 
             x='State/UTs', 
             y='Total Vaccination Doses', 
             height=700,
             width = 1200,
             color='Total Vaccination Doses')
fig.show()

### **Dataframe constituting values for only states/UTs with top 5 Total Vaccination Doses**

In [None]:
df_top = df.sort_values(['Total Vaccination Doses', 'Dose 1', 'Dose 2'],
               ascending = False).head(5)

In [None]:
fig = px.bar(df_top,x ="Total Vaccination Doses", y="State/UTs", color="Total Vaccination Doses", orientation="h",
             color_continuous_scale='Bluered_r', hover_name="State/UTs")

fig.show()

### **DOSE 2 v/s DOSE 1 - Scatter Plot**

In [None]:
fig = px.scatter(df, 
                 x="Dose 1", 
                 y="Dose 2", 
                 size='Total Vaccination Doses',
                 color="Total Vaccination Doses",
                 color_continuous_scale='Bluered_r',
                 height = 500,
                 width = 1070)
fig.show()

### **Dataframe constituting values for only states/UTs with highest positive cases (Top 5)**

In [None]:
df_join_top = df_join.sort_values(['Positive cases', 'Deaths'],
               ascending = False).head(5)

In [None]:
df_join_top

In [None]:
fig1 = px.bar(df_join_top,
             x ="Positive cases", 
             y="State/UTs", 
             color="Positive cases",
             orientation="h",
             color_continuous_scale='Bluered_r',
             hover_name="State/UTs",
             height=600,
             width=1100)._data

fig2 = px.bar(df_join_top,
             x ="Deaths", 
             y="State/UTs", 
             color="Deaths",
             orientation="h",
             color_continuous_scale='Bluered_r',
             hover_name="State/UTs",
             height=600)._data

dat = fig1+fig2
fig = go.Figure(dat)

fig3 = go.Figure(fig._data)
fig3.update_layout(barmode='relative', title_text='Top 5 Statewise Deaths and Positive Cases - Relative Bar Mode')
fig3.show()

fig.update_layout(title_text='Top 5 Statewise Deaths and Positive Cases - Default Bar Mode')
fig.show()

### **TOTAL VACCINATION DOSES v/s DEATHs - Scatter Plot**

In [None]:
fig = px.scatter(df_join, 
                 x="Total Vaccination Doses", 
                 y="Deaths", 
                 size='Total Vaccination Doses',
                 color="Total Vaccination Doses",
                 color_continuous_scale='Bluered_r',
                 height = 500,
                 width = 1070)
fig.show()

### **TOTAL VACCINATION DOSES v/s POSITIVE CASES - Scatter Plot**

In [None]:
fig = px.scatter(df_join, 
                 x="Total Vaccination Doses", 
                 y="Positive cases", 
                 size='Total Vaccination Doses',
                 color="Total Vaccination Doses",
                 color_continuous_scale='Bluered_r',
                 height = 500,
                 width = 1070)
fig.show()