In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_csv('covid_19_india.csv')

# Clean and preprocess
df.dropna(subset=["Confirmed", "Cured", "Deaths"], inplace=True)  # Drop missing values

# Convert columns to numeric (if needed)
df["Confirmed"] = pd.to_numeric(df["Confirmed"], errors='coerce')
df["Cured"] = pd.to_numeric(df["Cured"], errors='coerce')
df["Deaths"] = pd.to_numeric(df["Deaths"], errors='coerce')

# Create features and label
X = df[['Cured', 'Deaths']]
y = (df["Confirmed"] > 10000).astype(int)  # 1 if Confirmed > 10000, else 0

# Split data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Import models and metrics
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Initialize models
models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ["Logistic Regression", "SVM", "Decision Tree", "Random Forest", "KNN"]

model_scores = []

# Train and evaluate
for model, name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, zero_division=0)
    recall = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    
    print(f"🔍 {name}")
    print(f"   Accuracy : {accuracy:.2f}")
    print(f"   Precision: {precision:.2f}")
    print(f"   Recall   : {recall:.2f}")
    print(f"   F1 Score : {f1:.2f}")
    print("-" * 30)


🔍 Logistic Regression
   Accuracy : 0.98
   Precision: 0.99
   Recall   : 0.99
   F1 Score : 0.99
------------------------------
🔍 SVM
   Accuracy : 0.95
   Precision: 1.00
   Recall   : 0.92
   F1 Score : 0.96
------------------------------
🔍 Decision Tree
   Accuracy : 0.99
   Precision: 0.99
   Recall   : 0.99
   F1 Score : 0.99
------------------------------
🔍 Random Forest
   Accuracy : 0.99
   Precision: 1.00
   Recall   : 0.99
   F1 Score : 0.99
------------------------------
🔍 KNN
   Accuracy : 0.99
   Precision: 0.99
   Recall   : 0.98
   F1 Score : 0.99
------------------------------


In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# df = sns.load_dataset("covid_19.csv")
df = pd.read_csv('covid_19_india.csv')
df.head()
# sns.load_dataset('covid_19_india.csv')
# sns.load_dataset('covid_19_india.csv')

Unnamed: 0,Sno,Date,Time,State/UnionTerritory,ConfirmedIndianNational,ConfirmedForeignNational,Cured,Deaths,Confirmed
0,1,2020-01-30,6:00 PM,Kerala,1,0,0,0,1
1,2,2020-01-31,6:00 PM,Kerala,1,0,0,0,1
2,3,2020-02-01,6:00 PM,Kerala,2,0,0,0,2
3,4,2020-02-02,6:00 PM,Kerala,3,0,0,0,3
4,5,2020-02-03,6:00 PM,Kerala,3,0,0,0,3


In [16]:
#check for missing values
df.isnull().sum()

Sno                         0
Date                        0
Time                        0
State/UnionTerritory        0
ConfirmedIndianNational     0
ConfirmedForeignNational    0
Cured                       0
Deaths                      0
Confirmed                   0
dtype: int64