In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = None   

def load_dataset():
    global df
    print("\n== Load Dataset ==")
    path = input("Enter the path of the dataset (CSV file): ")
    try:
        df = pd.read_csv(path)
        print("Dataset loaded successfully!")
    except Exception as e:
        print("Error loading dataset:", e)


def explore_data():
    global df
    if df is None:
        print("No dataset loaded!")
        return

    print("\n== Explore Data ==")
    print("1. Display the first 5 rows")
    print("2. Display the last 5 rows")
    print("3. Display column names")
    print("4. Display data types")
    print("5. Display basic info")

    choice = input("Enter your choice: ")

    if choice == "1":
        print(df.head())
    elif choice == "2":
        print(df.tail())
    elif choice == "3":
        print(df.columns.tolist())
    elif choice == "4":
        print(df.dtypes)
    elif choice == "5":
        print(df.info())
    else:
        print("Invalid choice!")


def dataframe_operations():
    global df
    if df is None:
        print("No dataset loaded!")
        return

    print("\n== DataFrame Operations ==")
    print("1. Sort values")
    print("2. Group by column and sum")
    print("3. Filter rows")

    choice = input("Enter your choice: ")

    if choice == "1":
        col = input("Enter column name to sort: ")
        if col in df.columns:
            print(df.sort_values(by=col))
        else:
            print("Invalid column name!")

    elif choice == "2":
        col = input("Enter column to group by: ")
        if col in df.columns:
            print(df.groupby(col).sum())
        else:
            print("Invalid column name!")

    elif choice == "3":
        col = input("Enter column name to filter: ")
        val = input("Enter value to match: ")
        if col in df.columns:
            print(df[df[col] == val])
        else:
            print("Invalid column name!")
    else:
        print("Invalid choice!")


def handle_missing_data():
    global df
    if df is None:
        print("No dataset loaded!")
        return

    print("\n== Handle Missing Data ==")
    print("1. Display rows with missing values")
    print("2. Fill missing values with mean")
    print("3. Drop rows with missing values")
    print("4. Replace missing values with a specific value")

    choice = input("Enter your choice: ")

    if choice == "1":
        missing = df[df.isnull().any(axis=1)]
        if missing.empty:
            print("No missing values found in the dataset!")
        else:
            print(missing)

    elif choice == "2":
        df.fillna(df.mean(numeric_only=True), inplace=True)
        print("Missing values filled with mean successfully!")

    elif choice == "3":
        df.dropna(inplace=True)
        print("Rows with missing values dropped successfully!")

    elif choice == "4":
        val = input("Enter value to replace missing data with: ")
        df.fillna(val, inplace=True)
        print("Missing values replaced successfully!")

    else:
        print("Invalid choice!")


def descriptive_statistics():
    global df
    if df is None:
        print("No dataset loaded!")
        return

    print("\n== Descriptive Statistics ==")
    print(df.describe())


def data_visualization():
    global df
    if df is None:
        print("No dataset loaded!")
        return

    sns.set(style="whitegrid")

    print("\n== Data Visualization ==")
    print("1. Line Plot (new cases over time)")
    print("2. Bar Chart (Top 10 countries total cases)")
    print("3. Histogram (Distribution of new cases)")
    print("4. Scatter Plot (New cases vs New deaths)")
    print("5. Heatmap (Correlation matrix)")
    print("6. Bar Plot (Any custom x,y columns)")

    choice = input("Enter your choice: ")

    # --------------------------
   
    # --------------------------
    if choice == "1":
        if "date" in df.columns and "new_cases" in df.columns:
            plt.figure(figsize=(12,5))
            plt.plot(df["date"], df["new_cases"])
            plt.title("Daily New COVID-19 Cases Over Time")
            plt.xticks(rotation=45)
            plt.show()
        else:
            print("Required columns not found!")

    # --------------------------
   
    # --------------------------
    elif choice == "2":
        if "location" in df.columns and "total_cases" in df.columns:
            top_countries = df.groupby("location")["total_cases"].max().sort_values(ascending=False).head(10)
            plt.figure(figsize=(12,6))
            sns.barplot(x=top_countries.index, y=top_countries.values)
            plt.xticks(rotation=45)
            plt.title("Top 10 Countries with Highest Total Cases")
            plt.show()
        else:
            print("Required columns not found!")

    # --------------------------
   
    # --------------------------
    elif choice == "3":
        if "new_cases" in df.columns:
            plt.figure(figsize=(10,5))
            sns.histplot(df["new_cases"], kde=True, bins=40)
            plt.title("Distribution of New Daily COVID-19 Cases")
            plt.show()
        else:
            print("Column 'new_cases' not found!")

    # --------------------------
  
    # --------------------------
    elif choice == "4":
        if "new_cases" in df.columns and "new_deaths" in df.columns:
            plt.figure(figsize=(10,5))
            sns.scatterplot(x=df["new_cases"], y=df["new_deaths"])
            plt.title("New Cases vs New Deaths")
            plt.show()
        else:
            print("Required columns not found!")

    # --------------------------

    # --------------------------
    elif choice == "5":
        plt.figure(figsize=(10,6))
        sns.heatmap(df.corr(numeric_only=True), annot=True, cmap="coolwarm")
        plt.title("Correlation Heatmap of COVID-19 Data")
        plt.show()

    # --------------------------

    elif choice == "6":
        x = input("Enter x-axis column: ")
        y = input("Enter y-axis column: ")
        if x in df.columns and y in df.columns:
            plt.figure(figsize=(12,6))
            sns.barplot(x=df[x], y=df[y])
            plt.xticks(rotation=45)
            plt.title(f"Bar Plot of {y} vs {x}")
            plt.show()
        else:
            print("Invalid column names!")

    else:
        print("Invalid choice!")


def save_visualization():
    filename = input("Enter file name to save the plot (e.g., plot.png): ")
    try:
        plt.savefig(filename)
        print(f"Visualization saved as {filename} successfully!")
    except Exception as e:
        print("Error saving visualization:", e)


def main():
    while True:
        print("\n========== Data Analysis & Visualization Program ==========")
        print("1. Load Dataset")
        print("2. Explore Data")
        print("3. Perform DataFrame Operations")
        print("4. Handle Missing Data")
        print("5. Generate Descriptive Statistics")
        print("6. Data Visualization")
        print("7. Save Visualization")
        print("8. Exit")
        print("===========================================================")

        choice = input("Enter your choice: ")

        if choice == "1":
            load_dataset()
        elif choice == "2":
            explore_data()
        elif choice == "3":
            dataframe_operations()
        elif choice == "4":
            handle_missing_data()
        elif choice == "5":
            descriptive_statistics()
        elif choice == "6":
            data_visualization()
        elif choice == "7":
            save_visualization()
        elif choice == "8":
            print("Exiting the program. Goodbye!")
            break
        else:
            print("Invalid choice! Please try again.")



main()



1. Load Dataset
2. Explore Data
3. Perform DataFrame Operations
4. Handle Missing Data
5. Generate Descriptive Statistics
6. Data Visualization
7. Save Visualization
8. Exit


Enter your choice:  1



== Load Dataset ==


Enter the path of the dataset (CSV file):  C:\Users\kajal\Downloads\covid19_full_dataset_2020.csv


Dataset loaded successfully!

1. Load Dataset
2. Explore Data
3. Perform DataFrame Operations
4. Handle Missing Data
5. Generate Descriptive Statistics
6. Data Visualization
7. Save Visualization
8. Exit


Enter your choice:  2



== Explore Data ==
1. Display the first 5 rows
2. Display the last 5 rows
3. Display column names
4. Display data types
5. Display basic info


Enter your choice:  1


         Date Country  Confirmed  Recovered  Deaths
0  2020-01-01   India        152        190      11
1  2020-01-02   India        637        287      23
2  2020-01-03   India       1035        605      35
3  2020-01-04   India       1355        911      49
4  2020-01-05   India       1511       1244      64

1. Load Dataset
2. Explore Data
3. Perform DataFrame Operations
4. Handle Missing Data
5. Generate Descriptive Statistics
6. Data Visualization
7. Save Visualization
8. Exit


Enter your choice:  3



== DataFrame Operations ==
1. Sort values
2. Group by column and sum
3. Filter rows


Enter your choice:  1
Enter column name to sort:  date


Invalid column name!

1. Load Dataset
2. Explore Data
3. Perform DataFrame Operations
4. Handle Missing Data
5. Generate Descriptive Statistics
6. Data Visualization
7. Save Visualization
8. Exit


Enter your choice:  5



== Descriptive Statistics ==
           Confirmed     Recovered       Deaths
count     366.000000    366.000000   366.000000
mean    50804.046448  39319.510929  1722.811475
std     29322.216301  22720.073665   978.010009
min       152.000000    190.000000    11.000000
25%     25984.000000  19565.250000   852.500000
50%     50590.500000  39164.000000  1738.500000
75%     75523.000000  59522.000000  2609.000000
max    102074.000000  78478.000000  3365.000000

1. Load Dataset
2. Explore Data
3. Perform DataFrame Operations
4. Handle Missing Data
5. Generate Descriptive Statistics
6. Data Visualization
7. Save Visualization
8. Exit


Enter your choice:  6



== Data Visualization ==
1. Line Plot (new cases over time)
2. Bar Chart (Top 10 countries total cases)
3. Histogram (Distribution of new cases)
4. Scatter Plot (New cases vs New deaths)
5. Heatmap (Correlation matrix)
6. Bar Plot (Any custom x,y columns)


Enter your choice:  3


Column 'new_cases' not found!

1. Load Dataset
2. Explore Data
3. Perform DataFrame Operations
4. Handle Missing Data
5. Generate Descriptive Statistics
6. Data Visualization
7. Save Visualization
8. Exit


Enter your choice:  7
Enter file name to save the plot (e.g., plot.png):  covid 19


Visualization saved as covid 19 successfully!

1. Load Dataset
2. Explore Data
3. Perform DataFrame Operations
4. Handle Missing Data
5. Generate Descriptive Statistics
6. Data Visualization
7. Save Visualization
8. Exit


Enter your choice:  8


Exiting the program. Goodbye!


<Figure size 640x480 with 0 Axes>