In [64]:
#NAME: Swetha Kanduri
#Week 10

In [65]:
# Import necessary libraries
import pandas as pd
from sklearn.dummy import DummyClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split

In [66]:
# Load the CSV data
fourth_df = pd.read_csv("fourth.csv")
fourth_df

Unnamed: 0.1,Unnamed: 0,key,x,y,result
0,0,1,-0.167995,-0.715184,False
1,1,1,0.650967,0.807528,True
2,2,1,0.046858,-0.264659,False
3,3,1,0.882458,-0.205609,False
4,4,1,-0.338443,-0.885061,True
...,...,...,...,...,...
995,995,1,0.902761,-0.487468,False
996,996,1,0.350615,0.028936,False
997,997,1,-0.955657,0.381122,False
998,998,1,-0.275499,-0.104344,False


In [67]:
# Helper Function to Extract Features and Labels
def extract_X_Y(df):
    X = df[["x", "y"]].values
    y = df["result"].values
    return X, y

# Function to Evaluate the Model
def model_evaluation(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, zero_division=0)
    recall = recall_score(y_true, y_pred, zero_division=0)
    return accuracy, precision, recall

# Retraining Function
def retrain_model(df, model_type="KNN", k=15, test_size=0.4, random_state=None):
    """
    Retrains a model on the given dataframe.
    
    Parameters:
        df (DataFrame): Dataset containing 'x', 'y', and 'result'
        model_type (str): 'KNN' or 'Dummy'
        k (int): Number of neighbors for KNN (used only if model_type='KNN')
        test_size (float): Fraction of data to use as test set
        random_state (int): Random seed for reproducibility
    
    Returns:
        model: Trained model
        metrics: Dictionary with accuracy, precision, recall
    """
     # Split features and target
    X, y = extract_X_Y(df)
    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    # Select the classifier type
    if model_type == "KNN":
        model = KNeighborsClassifier(n_neighbors=k)
    elif model_type == "Dummy":
        model = DummyClassifier(strategy="stratified", random_state=random_state)
    else:
        raise ValueError("Invalid model_type. Choose 'KNN' or 'Dummy'.")

    # Fit the model and make predictions
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    # Evaluate model performance
    acc, prec, rec = model_evaluation(y_test, y_pred)
    metrics = {
        "accuracy": acc,
        "precision": prec,
        "recall": rec
    }
    
    return model, metrics

In [68]:
# Retrain using KNN classifier
knn_model, knn_metrics = retrain_model(fourth_df, model_type="KNN", k=5)
print("Retrained using KNN:\n", knn_metrics)

Retrained using KNN:
 {'accuracy': 0.9625, 'precision': 0.9393939393939394, 'recall': 0.9117647058823529}


In [75]:
# Retrain using Dummy classifier
dummy_model, dummy_metrics = retrain_model(fourth_df, model_type="Dummy")
print("\nRetrained using Dummy Classifier:\n", dummy_metrics)


Retrained using Dummy Classifier:
 {'accuracy': 0.6375, 'precision': 0.28888888888888886, 'recall': 0.24299065420560748}
