In [9]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

# 1. Load Data
matches = pd.read_csv(r"C:\Users\ms900\OneDrive\Desktop\IPL_Project\ipl_matches_2008_2022.csv")
balls = pd.read_csv(r"C:\Users\ms900\OneDrive\Desktop\IPL_Project\ipl_ball_by_ball_2008_2022.csv")
matches["season"]

0         2022
1         2022
2         2022
3         2022
4         2022
        ...   
945    2007/08
946    2007/08
947    2007/08
948    2007/08
949    2007/08
Name: season, Length: 950, dtype: object

In [20]:
balls = balls.merge(
    matches[['id', 'season']],
    how='left',
    on='id'
)

In [21]:
def predict_orange_cap(balls):
    season = balls['season'].max()  # Latest season
    season_balls = balls[balls['season'] == season]
    player_runs = season_balls.groupby('batter')['batsman_run'].sum().reset_index()
    top = player_runs.sort_values('batsman_run', ascending=False).iloc[0]
    return top['batter'], top['batsman_run']

In [23]:
# Purple Cap prediction (by latest season)
def predict_purple_cap(balls):
    season = balls['season'].max()
    season_balls = balls[balls['season'] == season]
    # Count only legitimate wickets (iswicket_delivery == 1, and player_out not null)
    wickets = season_balls[(season_balls['iswicket_delivery'] == 1) & (season_balls['player_out'].notnull())]
    player_wickets = wickets.groupby('bowler').size().reset_index(name='wickets')
    top = player_wickets.sort_values('wickets', ascending=False).iloc[0]
    return top['bowler'], top['wickets']

# Create a Model 

In [34]:
def train_match_winner_model(matches):
    features = ['city', 'team1', 'team2', 'venue', 'toss_winner', 'toss_decision']
    target = 'winning_team'
    matches = matches.dropna(subset=features + [target])
    encoders = {}
    for col in features + [target]:
        encoders[col] = LabelEncoder()
        matches[col] = encoders[col].fit_transform(matches[col].astype(str))
    X = matches[features]
    y = matches[target]
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X, y)
    return model, encoders

In [25]:
def predict_match_winner(model, encoders, city, team1, team2, venue, toss_winner, toss_decision):
    input_df = pd.DataFrame({
        'city': [city], 'team1': [team1], 'team2': [team2], 'venue': [venue], 
        'toss_winner': [toss_winner], 'toss_decision': [toss_decision]
    })
    for col in input_df.columns:
        input_df[col] = encoders[col].transform(input_df[col].astype(str))
    pred = model.predict(input_df)
    winner = encoders['winning_team'].inverse_transform(pred)[0]
    return winner

In [26]:
# 5. Cup Winner Prediction
def predict_cup_winner(matches, match_model, encoders, upcoming_matches):
    team_wins = {team: 0 for team in pd.concat([matches['team1'], matches['team2']]).unique()}
    for m in upcoming_matches:
        winner = predict_match_winner(match_model, encoders, *m)
        team_wins[winner] += 1
    past_wins = matches['winning_team'].value_counts()
    for team, wins in past_wins.items():
        team_wins[team] += wins
    likely_cup_winner = max(team_wins, key=team_wins.get)
    return likely_cup_winner, team_wins[likely_cup_winner]

In [27]:
# Orange Cap
orange_cap, orange_runs = predict_orange_cap(balls)
print(f"Likely Orange Cap: {orange_cap} ({orange_runs} runs)")

Likely Orange Cap: JC Buttler (863 runs)


In [30]:
# Orange Cap
orange_cap, orange_runs = predict_orange_cap(balls)
print(f"Likely Orange Cap: {orange_cap} ({orange_runs} runs)")

# Purple Cap
purple_cap, purple_wickets = predict_purple_cap(balls)
print(f"Likely Purple Cap: {purple_cap} ({purple_wickets} wickets)")



Likely Orange Cap: JC Buttler (863 runs)
Likely Purple Cap: YS Chahal (29 wickets)


In [33]:
# Match Winner Model Training
match_model, encoders = train_match_winner_model(matches)

# Predict a future match (example: update with your actual upcoming matches)
upcoming_match_details = [
    # city, team1, team2, venue, toss_winner, toss_decision
    ('Mumbai', 'Mumbai Indians', 'Chennai Super Kings', 'Wankhede Stadium', 'Mumbai Indians', 'bat'),
    # Add more upcoming matches as needed
]

for match in upcoming_match_details:
    predicted_winner = predict_match_winner(match_model, encoders, *match)
    print(f"Predicted winner for {match[1]} vs {match[2]}: {predicted_winner}")

# Predict likely Cup Winner
likely_cup_winner, total_wins = predict_cup_winner(matches, match_model, encoders, upcoming_match_details)
print(f"Likely IPL Cup Winner: {likely_cup_winner} (Total Projected Wins: {total_wins})")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  matches[col] = encoders[col].fit_transform(matches[col].astype(str))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  matches[col] = encoders[col].fit_transform(matches[col].astype(str))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  matches[col] = encoders[col].fit_transform(matches[col].astype(str)

Predicted winner for Mumbai Indians vs Chennai Super Kings: Mumbai Indians
Likely IPL Cup Winner: Mumbai Indians (Total Projected Wins: 132)
