In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Load dataset
data = pd.read_csv("matches.csv")  # Update path if needed

# Select relevant features
X = data[["season", "city", "toss_winner", "toss_decision", "team1", "team2"]].copy()
y = data["winner"]

# Encode categorical columns in X using separate encoders
encoders_X = {}
for col in X.columns:
    if X[col].dtype == "object":
        le = LabelEncoder()
        X[col] = le.fit_transform(X[col].astype(str))
        encoders_X[col] = le  # Save encoder if you want to inverse-transform later

# Encode target column y separately
le_y = LabelEncoder()
y = le_y.fit_transform(y.astype(str))

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Build KNN model
k = 5
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)

# Predict on test set
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"KNN Test Accuracy: {accuracy*100:.2f}%")

# Predict a new sample match
# Example values (season, city, toss_winner, toss_decision, team1, team2)
# Make sure to encode them using the same LabelEncoders
sample = [[2017, 5, 8, 2, 11, 12]]  # numbers are pre-encoded
prediction_encoded = knn.predict(sample)[0]

# Convert prediction back to original team name
prediction_team = le_y.inverse_transform([prediction_encoded])[0]
print("Predicted Winner for sample match:", prediction_team)


KNN Test Accuracy: 35.16%
Predicted Winner for sample match: Royal Challengers Bangalore
