In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import pickle


df = pd.read_csv("ipl_2025_matches.csv")

# Encode categorical columns
team_encoder = LabelEncoder()
venue_encoder = LabelEncoder()
toss_decision_encoder = LabelEncoder()
winner_encoder = LabelEncoder()

df['team1'] = team_encoder.fit_transform(df['team1'])
df['team2'] = team_encoder.fit_transform(df['team2'])
df['venue'] = venue_encoder.fit_transform(df['venue'])
df['toss_winner'] = team_encoder.fit_transform(df['toss_winner'])
df['toss_decision'] = toss_decision_encoder.fit_transform(df['toss_decision'])
df['winner'] = winner_encoder.fit_transform(df['winner'])

# Features and label
X = df.drop('winner', axis=1)
y = df['winner']

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train final model (RandomForest is best)
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

# Save everything
pickle.dump(model, open("ipl_model.pkl", "wb"))
pickle.dump(team_encoder, open("team_encoder.pkl", "wb"))
pickle.dump(venue_encoder, open("venue_encoder.pkl", "wb"))
pickle.dump(toss_decision_encoder, open("toss_decision_encoder.pkl", "wb"))
pickle.dump(winner_encoder, open("winner_encoder.pkl", "wb"))


In [4]:
#Manually prediction
import pickle

# Load saved model + encoders
model = pickle.load(open("ipl_model.pkl", "rb"))
team_encoder = pickle.load(open("team_encoder.pkl", "rb"))
venue_encoder = pickle.load(open("venue_encoder.pkl", "rb"))
toss_decision_encoder = pickle.load(open("toss_decision_encoder.pkl", "rb"))
winner_encoder = pickle.load(open("winner_encoder.pkl", "rb"))

# Manual input
team1_name = "Mumbai Indians"
team2_name = "Chennai Super Kings"
venue_name = "Arun Jaitley Stadium"
toss_winner_name = "Mumbai Indians"
toss_decision_name = "bat"

# Encode
input_array = np.array([[
    team_encoder.transform([team1_name])[0],
    team_encoder.transform([team2_name])[0],
    venue_encoder.transform([venue_name])[0],
    team_encoder.transform([toss_winner_name])[0],
    toss_decision_encoder.transform([toss_decision_name])[0]
]])

# Predict
predicted_class = model.predict(input_array)[0]
predicted_team = winner_encoder.inverse_transform([predicted_class])[0]
predicted_proba = model.predict_proba(input_array)[0]
confidence = max(predicted_proba) * 100

print(f"🏏 Predicted Winner: {predicted_team}")
print(f"📈 Confidence: {confidence:.2f}%")


🏏 Predicted Winner: Chennai Super Kings
📈 Confidence: 38.50%


