In [None]:
import pandas as pd
from pycaret.regression import *

# Load data
df = pd.read_csv('../data/T20_match_score.csv')

# Drop unnecessary columns
df.drop(['match_id'], axis=1, inplace=True)

# Setup PyCaret AutoML
reg = setup(data=df, target='final_score', session_id=123, 
            categorical_features=['batting_team', 'bowling_team', 'venue'], 
            silent=True)

# Run AutoML
best_model = compare_models()
save_model(best_model, 'best_cricket_model')


In [10]:
import os
print("Current directory:", os.getcwd())
print("Files:", os.listdir())


Current directory: d:\CricketScorePredictor\notebooks
Files: ['automl_cricket.ipynb', 'logs.log']


In [11]:
# 📦 Step 1: Import Libraries
import pandas as pd
from pycaret.regression import *

# 📁 Step 2: Load the dataset from main folder
df = pd.read_csv('t20_cricket_match_score_prediction.csv')

# 🔍 Step 3: View basic info (optional)
df.head()


Unnamed: 0,Match ID,Overs Played,Wickets Lost,Run Rate,Home/Away,Opponent Strength,Pitch Condition,Weather,Predicted Score
0,1,7,1,11.04,Away,3,Bowling,Sunny,82
1,2,20,10,11.87,Home,5,Bowling,Sunny,204
2,3,15,7,6.14,Home,7,Balanced,Sunny,105
3,4,11,8,8.84,Home,9,Batting,Cloudy,121
4,5,8,0,9.56,Home,2,Balanced,Sunny,104


In [12]:
# 🧹 Step 4: Drop unnecessary column (if it exists)
if 'match_id' in df.columns:
    df.drop('match_id', axis=1, inplace=True)

df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500 entries, 0 to 1499
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Match ID           1500 non-null   int64  
 1   Overs Played       1500 non-null   int64  
 2   Wickets Lost       1500 non-null   int64  
 3   Run Rate           1500 non-null   float64
 4   Home/Away          1500 non-null   object 
 5   Opponent Strength  1500 non-null   int64  
 6   Pitch Condition    1500 non-null   object 
 7   Weather            1500 non-null   object 
 8   Predicted Score    1500 non-null   int64  
dtypes: float64(1), int64(5), object(3)
memory usage: 105.6+ KB


In [23]:
# 📦 Step 1: Import libraries
import pandas as pd
from pycaret.regression import *

# 📁 Step 2: Load dataset
df = pd.read_csv('t20_cricket_match_score_prediction.csv')

# 🧹 Step 3: Clean column names
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')
print("Cleaned Columns:", df.columns)

# ⚙️ Step 4: Setup PyCaret (correct target)
s = setup(
    data=df,
    target='predicted_score',  # ✅ use cleaned column name
    session_id=42
)


Cleaned Columns: Index(['match_id', 'overs_played', 'wickets_lost', 'run_rate', 'home/away',
       'opponent_strength', 'pitch_condition', 'weather', 'predicted_score'],
      dtype='object')


Unnamed: 0,Description,Value
0,Session id,42
1,Target,predicted_score
2,Target type,Regression
3,Original data shape,"(1500, 9)"
4,Transformed data shape,"(1500, 13)"
5,Transformed train set shape,"(1050, 13)"
6,Transformed test set shape,"(450, 13)"
7,Numeric features,5
8,Categorical features,3
9,Preprocess,True


In [24]:
# 🧠 Step 5: Train and compare models
best_model = compare_models()

# 💾 Step 6: Save best model
save_model(best_model, 'best_cricket_model')


Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=Memory(location=None),
          steps=[('numerical_imputer',
                  TransformerWrapper(include=['match_id', 'overs_played',
                                              'wickets_lost', 'run_rate',
                                              'opponent_strength'],
                                     transformer=SimpleImputer())),
                 ('categorical_imputer',
                  TransformerWrapper(include=['home/away', 'pitch_condition',
                                              'weather'],
                                     transformer=SimpleImputer(strategy='most_frequent'))),
                 ('ordin...
                                                                          'mapping': Away    0
 Home    1
 NaN    -1
 dtype: int64}]))),
                 ('onehot_encoding',
                  TransformerWrapper(include=['pitch_condition', 'weather'],
                                     transformer=OneHotEncoder(cols=['pitch_condition',
 