1: Import Libraries & Load Data

In [3]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier

In [4]:
# === Load T20 player datasets ===
players_df = pd.read_csv(r'G:\SLTC\8th sem\machine learning\mini project\players.csv')
batting_df = pd.read_csv(r'G:\SLTC\8th sem\machine learning\mini project\T20\T20_Batting.csv')
bowling_df = pd.read_csv(r'G:\SLTC\8th sem\machine learning\mini project\T20\T20_Bolling.csv')


2: Clean & Prepare Batting Data

In [5]:
# Merge batting data with player info
batting_df = pd.merge(players_df[['PlayerID', 'Full Name', 'Playing Role']], batting_df, on='PlayerID', how='inner')

# Rename and select features
batting_df.rename(columns={
    'Runs': 'Runs_bat',
    'Ave': 'Avg_bat',
    'SR': 'SR_bat',
    'Mat': 'Mat_bat'
}, inplace=True)

batting_features = ['Runs_bat', 'Avg_bat', 'SR_bat', 'Mat_bat']
batting_df[batting_features] = batting_df[batting_features].apply(pd.to_numeric, errors='coerce').fillna(0)


3: Clean & Prepare Bowling Data

In [6]:
# Merge bowling data with player info
bowling_df = pd.merge(players_df[['PlayerID', 'Full Name', 'Playing Role']], bowling_df, on='PlayerID', how='inner')

# Rename and select features
bowling_df.rename(columns={
    'Wkts': 'Wkts_bowl',
    'Econ': 'Econ_bowl',
    'Ave': 'Avg_bowl',
    'SR': 'SR_bowl'
}, inplace=True)

bowling_features = ['Wkts_bowl', 'Econ_bowl', 'Avg_bowl', 'SR_bowl']
bowling_df[bowling_features] = bowling_df[bowling_features].apply(pd.to_numeric, errors='coerce').fillna(0)


4: Add Labels and Train Models

In [None]:
# Create dummy labels for selection
batting_df['Selected'] = (batting_df['Runs_bat'] > 250).astype(int)
bowling_df['Selected'] = (bowling_df['Wkts_bowl'] > 8).astype(int)

In [8]:
# Train model for batters
X_bat = batting_df[batting_features]
y_bat = batting_df['Selected']
bat_model = RandomForestClassifier(n_estimators=100, random_state=42)
bat_model.fit(X_bat, y_bat)
joblib.dump(bat_model, 'bat_model_t20.pkl')


['bat_model_t20.pkl']

In [None]:
# Train model for bowlers
X_bowl = bowling_df[bowling_features]
y_bowl = bowling_df['Selected']
bowl_model = RandomForestClassifier(n_estimators=100, random_state=42)
bowl_model.fit(X_bowl, y_bowl)
joblib.dump(bowl_model, 
'bowl_model_t20.pkl')

['bowl_model_t20.pkl']

5: Optional – Evaluate Accuracy

In [10]:
print("📈 Batter Model Accuracy:", bat_model.score(X_bat, y_bat))
print("📈 Bowler Model Accuracy:", bowl_model.score(X_bowl, y_bowl))

📈 Batter Model Accuracy: 1.0
📈 Bowler Model Accuracy: 1.0
