In [13]:
import pandas as pd
import numpy as np

# Load IPL match data
df = pd.read_csv("ipl_2025_matches.csv")
df.head()


Unnamed: 0,team1,team2,venue,toss_winner,toss_decision,winner
0,Sunrisers Hyderabad,Kolkata Knight Riders,M. Chinnaswamy Stadium,Sunrisers Hyderabad,field,Sunrisers Hyderabad
1,Chennai Super Kings,Lucknow Super Giants,M. A. Chidambaram Stadium,Chennai Super Kings,bat,Chennai Super Kings
2,Chennai Super Kings,Mumbai Indians,Ekana Cricket Stadium,Mumbai Indians,field,Mumbai Indians
3,Chennai Super Kings,Lucknow Super Giants,M. Chinnaswamy Stadium,Chennai Super Kings,field,Chennai Super Kings
4,Royal Challengers Bangalore,Delhi Capitals,Ekana Cricket Stadium,Delhi Capitals,bat,Delhi Capitals


In [14]:
from sklearn.preprocessing import LabelEncoder

# Select only the relevant columns
df = df[['team1', 'team2', 'venue', 'toss_winner', 'toss_decision', 'winner']]

# Drop missing values
df.dropna(inplace=True)

# Create a unified set of team names
all_teams = pd.concat([df['team1'], df['team2'], df['toss_winner'], df['winner']]).unique()



# Encode team-related columns
team_encoder = LabelEncoder()
team_encoder.fit(all_teams)

df['team1'] = team_encoder.transform(df['team1'])
df['team2'] = team_encoder.transform(df['team2'])
df['toss_winner'] = team_encoder.transform(df['toss_winner'])
df['winner'] = team_encoder.transform(df['winner'])

# Encode venue and toss decision
venue_encoder = LabelEncoder()
df['venue'] = venue_encoder.fit_transform(df['venue'])

toss_decision_encoder = LabelEncoder()
df['toss_decision'] = toss_decision_encoder.fit_transform(df['toss_decision'])

# Preview
df.head()


Unnamed: 0,team1,team2,venue,toss_winner,toss_decision,winner
0,9,3,5,9,1,9
1,0,4,4,0,0,0
2,0,5,2,5,1,5
3,0,4,5,0,1,0
4,8,1,2,1,0,1


In [15]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier

# Features and label
X = df.drop('winner', axis=1)
y = df['winner']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train RandomForest
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Train KNN with k=1 for exact match behavior
model = KNeighborsClassifier(n_neighbors=1)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.44


In [16]:
#explain RandomForestClassifier
# RandomForestClassifier is a machine learning algorithm that builds many decision trees and combines their results.
# Each tree makes a prediction, and the most common prediction among all trees is chosen as the final output.
# This approach helps improve accuracy and reduces the risk of overfitting compared to using a single decision tree.

# Simple example:
# Suppose you want to predict if a fruit is an apple or an orange based on its color and weight.
# RandomForestClassifier will create multiple decision trees using different samples and features.
# Each tree gives its own prediction (apple or orange), and the final prediction is the one most trees agree on.

In [17]:
import pickle

with open("model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("team_encoder.pkl", "wb") as f:
    pickle.dump(team_encoder, f)

with open("venue_encoder.pkl", "wb") as f:
    pickle.dump(venue_encoder, f)

with open("toss_decision_encoder.pkl", "wb") as f:
    pickle.dump(toss_decision_encoder, f)


In [18]:
# -----------------------------
# 📊 Manual Prediction in Jupyter Notebook
# -----------------------------

# Sample input values
team1_name = "Sunrisers Hyderabad"
team2_name = "Kolkata Knight Riders"
venue_name = "M. Chinnaswamy Stadium"
toss_winner_name = "Sunrisers Hyderabad"
toss_decision_name = "field"

# Encode using the same encoders used in training
encoded_input = {
    'team1': team_encoder.transform([team1_name])[0],
    'team2': team_encoder.transform([team2_name])[0],
    'venue': venue_encoder.transform([venue_name])[0],
    'toss_winner': team_encoder.transform([toss_winner_name])[0],
    'toss_decision': toss_decision_encoder.transform([toss_decision_name])[0]
}

input_array = np.array(list(encoded_input.values())).reshape(1, -1)

# Make prediction
predicted_proba = model.predict_proba(input_array)[0]
predicted_class = model.predict(input_array)[0]
predicted_team = team_encoder.inverse_transform([predicted_class])[0]
confidence = predicted_proba[predicted_class]

print(f"🏏 Predicted Winner: {predicted_team}")
print(f"📈 Confidence: {confidence*100:.2f}%")



🏏 Predicted Winner: Sunrisers Hyderabad
📈 Confidence: 100.00%


