In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

> **In this ve will visualize covid 19 dataset and try to predict it for future**


In [None]:
covid_df = pd.read_csv('../input/latest-covid19-india-statewise-data/Latest Covid-19 India Status.csv')
covid_df.head()


In [None]:
covid_df.columns

In [None]:
covid_df.columns=['State', 'Total_Cases', 'Active', 'Discharged', 'Deaths',
       'Active_ratio', 'Discharge_ratio', 'Death_ratio']
covid_df.head()

In [None]:
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

In [None]:
covid_df['Active_ratio'].plot(kind='hist',bins =10,title='active ratio ' ,color='black')
covid_df['Death_ratio'].plot(kind='hist',bins =10,title='Death_ratio' ,color='red')
covid_df['Discharge_ratio'].plot(kind='hist',bins =10,title='Discharge_ratio ' ,color='yellow')
colors = ["black", "red", "yellow"]
handles = [Rectangle((0,0),1,1,color=c,ec="k") for c in colors]
labels= ["Active_ratio","Death_ratio", "Discharge_ratio"]
plt.legend(handles , labels)
plt.xlabel('percentage')
plt.show()

**as we can see the active percent are mainly between 0 to 2 percent**

In [None]:
import seaborn as sns

In [None]:
total_cases_inc = covid_df.sort_values(by='Total_Cases' , ascending=False)
plt.figure(figsize=(20,20))
sns.barplot(data =  total_cases_inc , x='State', y='Total_Cases')
plt.xticks(rotation=90)
plt.show()

In [None]:
sns.catplot(data = covid_df, x='State' , y='Active', kind= "bar", height =4 ,aspect = 3) 
plt.xticks(rotation = 90)
plt.show()

In [None]:
high_low = []
for i in covid_df['Active']:
    if i >12000:
        high_low.append(1)
    else:
        high_low.append(0)

In [None]:
covid_df['Status'] = high_low
covid_df.head()

In [None]:
import plotly.express as px

In [None]:
figure = px.bar(covid_df, x ='State' , y='Total_Cases' , color='Status')
figure.show()

In [None]:
sns.heatmap(covid_df.corr(), annot = True)

In [None]:
fig = px.scatter(covid_df , x="Total_Cases", y="Active" , size="Deaths", size_max=50 , color='State')
fig.show()

In [None]:
import geopandas
from cartopy import crs as ccrs

In [None]:
mapping = px.choropleth(
    covid_df,
    geojson="https://gist.githubusercontent.com/jbrobst/56c13bbbf9d97d187fea01ca62ea5112/raw/e388c4cae20aa53cb5090210a42ebb9b765c0a36/india_states.geojson",
    featureidkey='properties.ST_NM',
    locations='State',
    color='Active',
    color_continuous_scale='Blues'
)

mapping.update_geos(fitbounds="locations")

mapping.show()

In [None]:
from pandas_profiling import ProfileReport

In [None]:
Covid_profile = ProfileReport(covid_df ,title = "Pandas Profiling Report of Covid 19 State wise")
Covid_profile.to_notebook_iframe()

In [None]:

fig1 = px.bar(x= covid_df.Active.index , y= covid_df.Active.values ,labels = {'x': 'States', 'y' : 'Total Cases (in millions)'}, title = 'State Wise Active Cases' , text= covid_df.Active.values)
fig1.update_traces(texttemplate='%{text:.2s}' , textposition = 'outside', marker_color ='black')
fig1.update_layout(uniformtext_minsize = 2, uniformtext_mode='hide', width=1300, height=500)
fig1.show()

In [None]:
import plotly.graph_objects as go

In [None]:
new_active_tot = covid_df.sort_values(by='Deaths', ascending=False).head(10)
figpie= go.Figure(data=[go.Pie(labels=covid_df['State'],values= new_active_tot['Deaths'])])
figpie.update_traces(hoverinfo = 'label+percent', textinfo='label+percent', hole=0.3)
figpie.update_layout( title_text="Top 10 States with Highest number of Death")
figpie.show()

In [None]:
vaccine_df = pd.read_csv('../input/vaccine-dataset/COVID-19 India Statewise Vaccine Data (1).xls')
vaccine_df.head()

In [None]:
print("The shape of the dataset is",vaccine_df.shape)

In [None]:
vaccine_df.describe(include = 'all')

In [None]:
vaccine_df.isnull().sum()

> **NO NULL VALUES**

In [None]:
profile_vaccine = ProfileReport(vaccine_df, title = "Pandas Profiling Report")
profile_vaccine.to_notebook_iframe()

> **Visualizing vaccination data**

In [None]:
vaccine_df.columns

In [None]:
vaccine_df_new = vaccine_df.sort_values(by=['Dose 1'] , ascending = False)
fig_vacc = px.line(vaccine_df_new , x = 'State/UTs', y='Dose 1', title = "First Dose", labels={'Dose 1': 'Number of First Dose Administered '})
fig_vacc.update_layout(width =1300 , height = 600)
fig_vacc.show()

In [None]:
vaccine_df_new2 = vaccine_df.sort_values(by=['Dose 2'] , ascending = False)
fig_vacc = px.line(vaccine_df_new , x = 'State/UTs', y='Dose 2', title = "Second Dose", labels={'Dose 2': 'Number of Second Dose Administered '})
fig_vacc.update_layout(width =1300 , height = 700)

fig_vacc.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=vaccine_df['State/UTs'], y=vaccine_df['Dose 1'],
                    mode='lines+markers',
                    name='First Dose'))
fig.add_trace(go.Scatter(x=vaccine_df['State/UTs'], y=vaccine_df['Dose 2'],
                    mode='lines+markers',
                    name='Second Dose'))
fig.update_layout(width=1300, height=700)
fig.show()

> **Multilinear reg**

In [None]:
new_covid = covid_df.drop(['State'],axis=1)

In [None]:
new_covid.head()

In [None]:
X = new_covid.iloc[:,:-1].values
y = new_covid.iloc[:,-1].values

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train , X_test ,y_train , y_test = train_test_split(X , y , test_size=0.2, random_state=42)

In [None]:
from sklearn.linear_model import LogisticRegression
logreg=LogisticRegression()
logreg.fit(X_train,y_train)
y_pred=logreg.predict(X_test)

In [None]:
y_pred

In [None]:
import sklearn
sklearn.metrics.accuracy_score(y_test,y_pred)

In [None]:
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,y_pred)
conf_matrix=pd.DataFrame(data=cm,columns=['Predicted:0','Predicted:1'],index=['Actual:0','Actual:1'])
plt.figure(figsize = (8,5))
sns.heatmap(conf_matrix, annot=True,fmt='d',cmap="YlGnBu")