In [13]:
import pandas as pd


from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.dummy import DummyClassifier
from sklearn.metrics import classification_report

def retrain_mod(dataset_file, model_type='knn'):
    
    print(f"\nStarting retraining using '{model_type.upper()}' on file: {dataset_file}")
    
    # Load the dataset.
    df = pd.read_csv(dataset_file)

    # Drop unnecessary columns if present
    df = df.drop(columns=['Unnamed: 0', 'key'], errors='ignore')

    #  features and target
    X = df[['x', 'y']]
    y = df['result']

    # Train AND test split with stratification.
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, stratify=y, random_state=42
    )

    # Initialise the chosen Model
    if model_type == 'knn':
        model = KNeighborsClassifier(n_neighbors=3)
    elif model_type == 'dummy':
        model = DummyClassifier(strategy='most_frequent')
    else:
        raise ValueError("Invalid model_type. Choose 'knn' or 'dummy'")

    # Training  the MODEL
    model.fit(X_train, y_train)

    # Prediction  and evaluation
    y_pred = model.predict(X_test)
    print(classification_report(y_test, y_pred))

    return model

In [15]:
#  Run on all four datasets with both KNN and Dummy
datasets = ['first.csv', 'second.csv', 'third.csv', 'fourth.csv']
model_types = ['knn', 'dummy']

for file in datasets:
    for model in model_types:
        retrain_model(file, model_type=model)


Starting retraining using 'KNN' on file: first.csv
              precision    recall  f1-score   support

       False       0.86      0.89      0.88        99
        True       0.89      0.86      0.87       101

    accuracy                           0.88       200
   macro avg       0.88      0.88      0.87       200
weighted avg       0.88      0.88      0.87       200


Starting retraining using 'DUMMY' on file: first.csv
              precision    recall  f1-score   support

       False       0.00      0.00      0.00        99
        True       0.51      1.00      0.67       101

    accuracy                           0.51       200
   macro avg       0.25      0.50      0.34       200
weighted avg       0.26      0.51      0.34       200


Starting retraining using 'KNN' on file: second.csv
              precision    recall  f1-score   support

       False       0.90      0.89      0.90       101
        True       0.89      0.90      0.89        99

    accuracy           

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

       False       0.96      0.98      0.97       160
        True       0.92      0.85      0.88        40

    accuracy                           0.95       200
   macro avg       0.94      0.92      0.93       200
weighted avg       0.95      0.95      0.95       200


Starting retraining using 'DUMMY' on file: third.csv
              precision    recall  f1-score   support

       False       0.80      1.00      0.89       160
        True       0.00      0.00      0.00        40

    accuracy                           0.80       200
   macro avg       0.40      0.50      0.44       200
weighted avg       0.64      0.80      0.71       200


Starting retraining using 'KNN' on file: fourth.csv
              precision    recall  f1-score   support

       False       0.97      0.97      0.97       151
        True       0.92      0.92      0.92        49

    accuracy                           0.96       200
   macro avg       0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


This retraining tool enables you to simply transition between KNN and Dummy classifiers using stratified cross-validation. The KNN classifier is projected to produce more consistent and balanced results because it outperforms the Dummy classifier across datasets. The Dummy classifier predicts the majority class, resulting in lower performance, particularly on skewed datasets. As a result, utilizing the KNN classifier is probably the best option for the majority of instances.