In [3]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import joblib

# Load datasets
matches = pd.read_csv('matches.csv')
deliveries = pd.read_csv('deliveries.csv')

# Merge to get winner and venue into deliveries
df = deliveries.merge(matches[['id', 'winner', 'venue']], left_on='match_id', right_on='id', how='left')

# Filter for 2nd innings only
df = df[df['inning'] == 2]

# Add cumulative runs and wickets
df['current_runs'] = df.groupby('match_id')['total_runs'].cumsum()
df['is_wicket'] = df['dismissal_kind'].notnull().astype(int)
df['wickets'] = df.groupby('match_id')['is_wicket'].cumsum()

# Calculate total balls bowled
df['total_balls'] = (df['over'] - 1) * 6 + df['ball']

# Drop rows where total_balls = 0 (just in case)
df = df[df['total_balls'] > 0]

# Encode winning team
df['win'] = (df['batting_team'] == df['winner']).astype(int)

# Encode teams and venue
team_encoder = LabelEncoder()
venue_encoder = LabelEncoder()

df['batting_team_enc'] = team_encoder.fit_transform(df['batting_team'])
df['bowling_team_enc'] = team_encoder.fit_transform(df['bowling_team'])
df['venue_enc'] = venue_encoder.fit_transform(df['venue'])

# Final features
X = df[['batting_team_enc', 'bowling_team_enc', 'venue_enc',
        'current_runs', 'wickets', 'over', 'ball']]
y = df['win']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Win predictor model accuracy: {accuracy * 100:.2f}%")

# Save model and encoders
joblib.dump(model, 'model/win_predictor.pkl')
joblib.dump(team_encoder, 'model/team_encoder.pkl')
joblib.dump(venue_encoder, 'model/venue_encoder.pkl')


Win predictor model accuracy: 97.15%


['model/venue_encoder.pkl']