1: Import Libraries & Load Data

In [1]:
# 1: Import Libraries & Load Data
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier

# Load ODI datasets
players_df = pd.read_csv(r'G:\SLTC\8th sem\machine learning\mini project\players.csv')
odi_batting_df = pd.read_csv(r'G:\SLTC\8th sem\machine learning\mini project\ODI\ODI_batting.csv')
odi_bowling_df = pd.read_csv(r'G:\SLTC\8th sem\machine learning\mini project\ODI\ODI_bawling.csv')


2: Clean & Prepare Batting Data

In [2]:
# 2: Clean & Prepare ODI Batting Data

# Merge player info with batting data
odi_batting_df = pd.merge(players_df[['PlayerID', 'Full Name', 'Playing Role']],
                          odi_batting_df, on='PlayerID', how='inner')

# Rename columns for consistency
odi_batting_df.rename(columns={
    'Runs': 'Runs_bat',
    'Ave': 'Avg_bat',
    'SR': 'SR_bat',
    'Mat': 'Mat_bat'
}, inplace=True)

# Select and convert numeric features
batting_features_odi = ['Runs_bat', 'Avg_bat', 'SR_bat', 'Mat_bat']
odi_batting_df[batting_features_odi] = odi_batting_df[batting_features_odi].apply(pd.to_numeric, errors='coerce').fillna(0)


3: Clean & Prepare Bowling Data

In [3]:
# 3: Clean & Prepare ODI Bowling Data

# Merge player info with bowling data
odi_bowling_df = pd.merge(players_df[['PlayerID', 'Full Name', 'Playing Role']],
                          odi_bowling_df, on='PlayerID', how='inner')

# Rename columns for consistency
odi_bowling_df.rename(columns={
    'Wkts': 'Wkts_bowl',
    'Econ': 'Econ_bowl',
    'Ave': 'Avg_bowl',
    'SR': 'SR_bowl'
}, inplace=True)

# Select and convert numeric features
bowling_features_odi = ['Wkts_bowl', 'Econ_bowl', 'Avg_bowl', 'SR_bowl']
odi_bowling_df[bowling_features_odi] = odi_bowling_df[bowling_features_odi].apply(pd.to_numeric, errors='coerce').fillna(0)


4: Add Labels and Train Models

In [4]:
# 4: Add Labels (Based on basic thresholds)

# You can adjust these thresholds for better performance
odi_batting_df['Selected'] = (odi_batting_df['Runs_bat'] > 500).astype(int)
odi_bowling_df['Selected'] = (odi_bowling_df['Wkts_bowl'] > 15).astype(int)


5: Train & Save Batting Model

In [5]:
# 5: Train & Save Batting Model for ODI

X_bat_odi = odi_batting_df[batting_features_odi]
y_bat_odi = odi_batting_df['Selected']

bat_model_odi = RandomForestClassifier(n_estimators=100, random_state=42)
bat_model_odi.fit(X_bat_odi, y_bat_odi)

joblib.dump(bat_model_odi, 'bat_model_odi.pkl')


['bat_model_odi.pkl']

6: Train & Save Bowling Model

In [6]:
# 6: Train & Save Bowling Model for ODI

X_bowl_odi = odi_bowling_df[bowling_features_odi]
y_bowl_odi = odi_bowling_df['Selected']

bowl_model_odi = RandomForestClassifier(n_estimators=100, random_state=42)
bowl_model_odi.fit(X_bowl_odi, y_bowl_odi)

joblib.dump(bowl_model_odi, 'bowl_model_odi.pkl')


['bowl_model_odi.pkl']

7: Evaluate Accuracy (Optional)

In [7]:
# 7: Optional - Evaluate Accuracy

print("📈 ODI Batter Model Accuracy:", bat_model_odi.score(X_bat_odi, y_bat_odi))
print("📈 ODI Bowler Model Accuracy:", bowl_model_odi.score(X_bowl_odi, y_bowl_odi))


📈 ODI Batter Model Accuracy: 1.0
📈 ODI Bowler Model Accuracy: 1.0
