In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
matches = pd.read_csv("cleaned_matches.csv")
matches.columns = matches.columns.str.strip().str.lower()

# Select useful columns
data = matches[["season", "team1", "team2", "toss_winner", "toss_decision", "venue", "winner"]].dropna()

# Encode categorical variables
le = LabelEncoder()
for col in ["team1", "team2", "toss_winner", "toss_decision", "venue", "winner"]:
    data[col] = le.fit_transform(data[col])

# Train-test split
X = data.drop("winner", axis=1)
y = data["winner"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


✅ Accuracy: 0.4794520547945205

Classification Report:
               precision    recall  f1-score   support

           0       0.53      0.70      0.60        27
           1       0.52      0.55      0.54        20
           2       0.69      0.75      0.72        12
           3       0.56      0.50      0.53        28
           4       0.67      0.46      0.55        13
           5       0.35      0.45      0.39        20
           6       0.00      0.00      0.00         1
           7       0.33      0.15      0.21        26
           8       0.44      0.48      0.46        25
           9       0.52      0.50      0.51        30
          10       0.50      0.33      0.40         3
          11       0.28      0.36      0.31        14

    accuracy                           0.48       219
   macro avg       0.45      0.44      0.43       219
weighted avg       0.48      0.48      0.47       219



In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

# Load deliveries data
deliveries = pd.read_csv("cleaned_deliveries.csv")
deliveries.columns = deliveries.columns.str.strip().str.lower()

# Aggregate per innings
innings_summary = deliveries.groupby(["match_id", "inning", "batting_team", "bowling_team", "over"]).agg(
    runs=("total_runs", "sum"),
    wickets=("player_dismissed", lambda x: (x.notna()).sum())
).reset_index()

# Cumulative runs & wickets
innings_summary["cum_runs"] = innings_summary.groupby(["match_id", "inning"])["runs"].cumsum()
innings_summary["cum_wkts"] = innings_summary.groupby(["match_id", "inning"])["wickets"].cumsum()

# Final score of innings
final_scores = innings_summary.groupby(["match_id", "inning"])["cum_runs"].max().reset_index(name="final_score")

# Merge with current state
data = pd.merge(innings_summary, final_scores, on=["match_id", "inning"])

# Features: after 6 overs predict final score
train_data = data[data["over"] <= 6]

X = train_data[["cum_runs", "cum_wkts", "over"]]
y = train_data["final_score"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train regressor
reg = RandomForestRegressor(n_estimators=200, random_state=42)
reg.fit(X_train, y_train)

# Predictions
y_pred = reg.predict(X_test)

print("✅ MAE (Mean Absolute Error):", mean_absolute_error(y_test, y_pred))


✅ MAE (Mean Absolute Error): 24.777742201967534


In [17]:
import pandas as pd

# ✅ Match Outcome Predictions for All Matches
def predict_all_matches(matches_df):
    # Encode dataset the same way as training
    data = matches_df[["season", "team1", "team2", "toss_winner", "toss_decision", "venue"]].copy()
    
    # Encode using label encoders
    for col in ["team1", "team2", "toss_winner", "toss_decision", "venue"]:
        data[col] = data[col].apply(
            lambda x: le_dict[col].transform([x])[0] if x in le_dict[col].classes_ else 0
        )
    
    # Predict winners
    predicted_winners = outcome_model.predict(data)
    predicted_winners = le_dict["winner"].inverse_transform(predicted_winners)
    
    matches_df["predicted_winner"] = predicted_winners
    return matches_df


# Run predictions
predicted_matches = predict_all_matches(matches.copy())

print("✅ Predicted Winners for All Matches")
display(predicted_matches.head(10))


✅ Predicted Winners for All Matches


Unnamed: 0,id,season,city,date,match_type,player_of_match,venue,team1,team2,toss_winner,...,result_margin,target_runs,target_overs,super_over,method,umpire1,umpire2,year,match_result,predicted_winner
0,335982,2008,Bangalore,18-04-2008,League,BB McCullum,M Chinnaswamy Stadium,Royal Challengers Bangalore,Kolkata Knight Riders,Royal Challengers Bangalore,...,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen,2008,Completed,Kolkata Knight Riders
1,335983,2008,Chandigarh,19-04-2008,League,MEK Hussey,"Punjab Cricket Association Stadium, Mohali",Punjab Kings,Chennai Super Kings,Chennai Super Kings,...,33.0,241.0,20.0,N,,MR Benson,SL Shastri,2008,Completed,Chennai Super Kings
2,335984,2008,Delhi,19-04-2008,League,MF Maharoof,Feroz Shah Kotla,Delhi Capitals,Rajasthan Royals,Rajasthan Royals,...,9.0,130.0,20.0,N,,Aleem Dar,GA Pratapkumar,2008,Completed,Rajasthan Royals
3,335985,2008,Mumbai,20-04-2008,League,MV Boucher,Wankhede Stadium,Mumbai Indians,Royal Challengers Bangalore,Mumbai Indians,...,5.0,166.0,20.0,N,,SJ Davis,DJ Harper,2008,Completed,Mumbai Indians
4,335986,2008,Kolkata,20-04-2008,League,DJ Hussey,Eden Gardens,Kolkata Knight Riders,Sunrisers Hyderabad,Sunrisers Hyderabad,...,5.0,111.0,20.0,N,,BF Bowden,K Hariharan,2008,Completed,Kolkata Knight Riders
5,335987,2008,Jaipur,21-04-2008,League,SR Watson,Sawai Mansingh Stadium,Rajasthan Royals,Punjab Kings,Punjab Kings,...,6.0,167.0,20.0,N,,Aleem Dar,RB Tiffin,2008,Completed,Rajasthan Royals
6,335988,2008,Hyderabad,22-04-2008,League,V Sehwag,"Rajiv Gandhi International Stadium, Uppal",Sunrisers Hyderabad,Delhi Capitals,Sunrisers Hyderabad,...,9.0,143.0,20.0,N,,IL Howell,AM Saheba,2008,Completed,Delhi Capitals
7,335989,2008,Chennai,23-04-2008,League,ML Hayden,"MA Chidambaram Stadium, Chepauk",Chennai Super Kings,Mumbai Indians,Mumbai Indians,...,6.0,209.0,20.0,N,,DJ Harper,GA Pratapkumar,2008,Completed,Chennai Super Kings
8,335990,2008,Hyderabad,24-04-2008,League,YK Pathan,"Rajiv Gandhi International Stadium, Uppal",Sunrisers Hyderabad,Rajasthan Royals,Rajasthan Royals,...,3.0,215.0,20.0,N,,Asad Rauf,MR Benson,2008,Completed,Rajasthan Royals
9,335991,2008,Chandigarh,25-04-2008,League,KC Sangakkara,"Punjab Cricket Association Stadium, Mohali",Punjab Kings,Mumbai Indians,Mumbai Indians,...,66.0,183.0,20.0,N,,Aleem Dar,AM Saheba,2008,Completed,Punjab Kings
