requirements.txt
pandas
numpy
plotly
streamlit

In [1]:
import pandas as pd


def load_data(filepath: D:\Data CSV Files\country_wise_latest) -> pd.DataFrame:
    """
    Load COVID-19 dataset and clean column names.
    """
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.strip()
    return df


SyntaxError: invalid syntax (2035263908.py, line 4)

In [None]:
import pandas as pd


def preprocess_data(df: pd.DataFrame) -> pd.DataFrame:
    """
    Perform preprocessing and feature engineering.
    """
    df = df.copy()

    # Create Recovery Rate %
    df["Recovery Rate (%)"] = (
        df["Recovered"] / df["Confirmed"]
    ) * 100

    # Create Death Rate %
    df["Death Rate (%)"] = (
        df["Deaths"] / df["Confirmed"]
    ) * 100

    # Handle infinite values if any
    df.replace([float("inf"), -float("inf")], 0, inplace=True)

    return df


In [None]:
import pandas as pd


def top_countries_by_confirmed(df: pd.DataFrame, n: int = 10) -> pd.DataFrame:
    return df.sort_values(by="Confirmed", ascending=False).head(n)


def top_countries_by_deaths(df: pd.DataFrame, n: int = 10) -> pd.DataFrame:
    return df.sort_values(by="Deaths", ascending=False).head(n)


def summary_statistics(df: pd.DataFrame) -> pd.DataFrame:
    return df.describe()


In [None]:
import plotly.express as px
import pandas as pd


def plot_top_confirmed(df: pd.DataFrame):
    top = df.sort_values(by="Confirmed", ascending=False).head(10)

    fig = px.bar(
        top,
        x="Country/Region",
        y="Confirmed",
        title="Top 10 Countries by Confirmed COVID-19 Cases",
    )

    return fig


def plot_top_deaths(df: pd.DataFrame):
    top = df.sort_values(by="Deaths", ascending=False).head(10)

    fig = px.bar(
        top,
        x="Country/Region",
        y="Deaths",
        title="Top 10 Countries by COVID-19 Deaths",
    )

    return fig


def plot_confirmed_vs_deaths(df: pd.DataFrame):
    fig = px.scatter(
        df,
        x="Confirmed",
        y="Deaths",
        hover_name="Country/Region",
        title="Confirmed vs Deaths (Country Comparison)",
    )

    return fig


def plot_active_distribution(df: pd.DataFrame):
    fig = px.histogram(
        df,
        x="Active",
        title="Distribution of Active Cases",
    )

    return fig


def plot_global_map(df: pd.DataFrame):
    fig = px.choropleth(
        df,
        locations="Country/Region",
        locationmode="country names",
        color="Confirmed",
        title="Global COVID-19 Confirmed Cases",
    )

    return fig


In [None]:
import streamlit as st
from src.data_loader import load_data
from src.preprocessing import preprocess_data
from src.analysis import summary_statistics
from src.visualization import (
    plot_top_confirmed,
    plot_top_deaths,
    plot_confirmed_vs_deaths,
    plot_active_distribution,
    plot_global_map,
)

st.set_page_config(page_title="COVID-19 Dashboard", layout="wide")

st.title("ðŸ¦  COVID-19 Interactive Data Dashboard")

# Load Data
df = load_data("data/country_wise_latest.csv")
df = preprocess_data(df)

# Summary Section
st.subheader("Dataset Overview")
st.write(df.head())

st.subheader("Statistical Summary")
st.dataframe(summary_statistics(df))

# Visualizations
st.subheader("Top 10 Countries by Confirmed Cases")
st.plotly_chart(plot_top_confirmed(df), use_container_width=True)

st.subheader("Top 10 Countries by Deaths")
st.plotly_chart(plot_top_deaths(df), use_container_width=True)

st.subheader("Confirmed vs Deaths Comparison")
st.plotly_chart(plot_confirmed_vs_deaths(df), use_container_width=True)

st.subheader("Active Cases Distribution")
st.plotly_chart(plot_active_distribution(df), use_container_width=True)

st.subheader("Global Spread Map")
st.plotly_chart(plot_global_map(df), use_container_width=True)
