In [None]:
import pandas as pd
import plotly.express as px


In [None]:
df = pd.read_csv("https://api.covid19india.org/csv/latest/state_wise.csv")
print("Dataset loaded!")

Dataset loaded!


In [None]:
print("\nDataset Shape:", df.shape)
print("\nFirst 5 rows:")
print(df.head())


Dataset Shape: (38, 12)

First 5 rows:
                         State  Confirmed  Recovered  Deaths  Active  \
0                        Total   29827268   28672170  385220  758140   
1  Andaman and Nicobar Islands       7363       7128     127     108   
2               Andhra Pradesh    1839243    1759390   12224   67629   
3            Arunachal Pradesh      32692      29934     159    2599   
4                        Assam     477159     436043    4138   35631   

     Last_Updated_Time  Migrated_Other State_code  Delta_Confirmed  \
0  19/06/2021 12:34:45           11738         TT             4478   
1  18/06/2021 21:22:30               0         AN                0   
2  18/06/2021 18:21:30               0         AP                0   
3  19/06/2021 02:15:32               0         AR                0   
4  18/06/2021 21:22:31            1347         AS                0   

   Delta_Recovered  Delta_Deaths  \
0             1216            53   
1                0             0  

In [None]:
# STEP 4: Display Data
# ----------------------------------------------------------------------------
print("\nDataset Shape:", df.shape)
print("\nFirst 5 rows:")
print(df.head())




Dataset Shape: (38, 12)

First 5 rows:
                         State  Confirmed  Recovered  Deaths  Active  \
0                        Total   29827268   28672170  385220  758140   
1  Andaman and Nicobar Islands       7363       7128     127     108   
2               Andhra Pradesh    1839243    1759390   12224   67629   
3            Arunachal Pradesh      32692      29934     159    2599   
4                        Assam     477159     436043    4138   35631   

     Last_Updated_Time  Migrated_Other State_code  Delta_Confirmed  \
0  19/06/2021 12:34:45           11738         TT             4478   
1  18/06/2021 21:22:30               0         AN                0   
2  18/06/2021 18:21:30               0         AP                0   
3  19/06/2021 02:15:32               0         AR                0   
4  18/06/2021 21:22:31            1347         AS                0   

   Delta_Recovered  Delta_Deaths  \
0             1216            53   
1                0             0  

In [None]:
df = df.rename(columns={
    'Confirmed': 'Total_Cases',
    'Active': 'Active_Cases',
    'Recovered': 'Cured',
    'Deaths': 'Deaths'
})


In [None]:
df['Total_Cases'] = pd.to_numeric(df['Total_Cases'], errors='coerce').fillna(0)
df['Active_Cases'] = pd.to_numeric(df['Active_Cases'], errors='coerce').fillna(0)
df['Cured'] = pd.to_numeric(df['Cured'], errors='coerce').fillna(0)
df['Deaths'] = pd.to_numeric(df['Deaths'], errors='coerce').fillna(0)

df = df[df['State'] != 'Total']
df = df[df['State'] != 'State Unassigned']
df = df.fillna(0)


In [None]:
df_sorted = df.sort_values(by='Total_Cases', ascending=False).head(15)

fig1 = px.bar(
    df_sorted,
    x='Total_Cases',
    y='State',
    orientation='h',
    title='Top 15 States by Total Cases',
    color='Total_Cases',
    color_continuous_scale='Blues',
    text='Total_Cases'
)

fig1.update_layout(height=600, yaxis={'categoryorder': 'total ascending'})
fig1.update_traces(texttemplate='%{text:,.0f}', textposition='outside')
fig1.show()


In [None]:
df_sorted2 = df.sort_values(by='Active_Cases', ascending=False).head(15)

fig2 = px.bar(
    df_sorted2,
    x='Active_Cases',
    y='State',
    orientation='h',
    title='Top 15 States by Active Cases',
    color='Active_Cases',
    color_continuous_scale='Reds',
    text='Active_Cases'
)

fig2.update_layout(height=600, yaxis={'categoryorder': 'total ascending'})
fig2.update_traces(texttemplate='%{text:,.0f}', textposition='outside')
fig2.show()


In [None]:
df_sorted3 = df.sort_values(by='Cured', ascending=False).head(15)

fig3 = px.bar(
    df_sorted3,
    x='Cured',
    y='State',
    orientation='h',
    title='Top 15 States by Cured Cases',
    color='Cured',
    color_continuous_scale='Greens',
    text='Cured'
)

fig3.update_layout(height=600, yaxis={'categoryorder': 'total ascending'})
fig3.update_traces(texttemplate='%{text:,.0f}', textposition='outside')
fig3.show()

In [None]:
fig = px.scatter(
    df,
    x='Total_Cases',
    y='Deaths',
    size='Total_Cases',
    color='Cured',
    hover_name='State',
    title='Total Cases vs. Deaths by State (Colored by Cured Cases)',
    labels={'Cured': 'Cured Cases'}
)

fig.update_layout(height=600)
fig.show()

In [None]:
df_sorted4 = df.sort_values(by='Deaths', ascending=False).head(15)

fig4 = px.bar(
    df_sorted4,
    x='Deaths',
    y='State',
    orientation='h',
    title='Top 15 States by Deaths',
    color='Deaths',
    color_continuous_scale='Oranges',
    text='Deaths'
)

fig4.update_layout(height=600, yaxis={'categoryorder': 'total ascending'})
fig4.update_traces(texttemplate='%{text:,.0f}', textposition='outside')
fig4.show()

In [None]:
df_top10 = df.sort_values(by='Total_Cases', ascending=False).head(10)

fig = px.pie(
    df_top10,
    values='Total_Cases',
    names='State',
    title='Top 10 States - Share of Total COVID-19 Cases',
    color_discrete_sequence=px.colors.qualitative.Set3
)

fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(height=600)
fig.show()



Pie chart created!


In [None]:
df_top5 = df.sort_values(by='Total_Cases', ascending=False).head(5)

fig6 = px.bar(
    df_top5,
    x='State',
    y=['Total_Cases', 'Active_Cases', 'Cured', 'Deaths'],
    title='Top 5 States - All Metrics Comparison',
    barmode='group',
    labels={'value': 'Count', 'variable': 'Metric'}
)

fig6.update_layout(height=500)
fig6.show()


In [None]:
print("SUMMARY STATISTICS")
print("="*60)

total_cases = df['Total_Cases'].sum()
total_active = df['Active_Cases'].sum()
total_cured = df['Cured'].sum()
total_deaths = df['Deaths'].sum()

print(f"\nTotal Cases in India: {total_cases:,.0f}")
print(f"Total Active Cases: {total_active:,.0f}")
print(f"Total Cured: {total_cured:,.0f}")
print(f"Total Deaths: {total_deaths:,.0f}")

recovery_rate = (total_cured / total_cases * 100)
fatality_rate = (total_deaths / total_cases * 100)

print(f"\nRecovery Rate: {recovery_rate:.2f}%")
print(f"Fatality Rate: {fatality_rate:.2f}%")



SUMMARY STATISTICS

Total Cases in India: 29,827,268
Total Active Cases: 758,140
Total Cured: 28,672,170
Total Deaths: 385,220

Recovery Rate: 96.13%
Fatality Rate: 1.29%
