In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [3]:
cricket_data = pd.read_csv('cricket.csv')
cricket_data.head()

Unnamed: 0,match_id,team,opponent,team_score_innings1,toss,ground,result
0,1,Pakistan,England,153,bowl,Gaddafi Stadium,1
1,2,India,Sri Lanka,160,bat,Dubai International Stadium,0
2,3,West Indies,India,182,bowl,Old Trafford,0
3,4,South Africa,New Zealand,119,bat,Lord's,0
4,5,Sri Lanka,India,196,bat,Pallekele International Cricket Stadium,1


In [4]:
x = cricket_data.drop(columns=['match_id','result'])
y = cricket_data['result']

In [5]:
x.head()

Unnamed: 0,team,opponent,team_score_innings1,toss,ground
0,Pakistan,England,153,bowl,Gaddafi Stadium
1,India,Sri Lanka,160,bat,Dubai International Stadium
2,West Indies,India,182,bowl,Old Trafford
3,South Africa,New Zealand,119,bat,Lord's
4,Sri Lanka,India,196,bat,Pallekele International Cricket Stadium


In [6]:
y.head()

0    1
1    0
2    0
3    0
4    1
Name: result, dtype: int64

In [7]:
categorical_cols = ['team', 'opponent', 'toss', 'ground']
numerical_cols = ['team_score_innings1']

In [8]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols),
        ('num', 'passthrough', numerical_cols)
    ]
)

In [9]:
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(max_iter=1000))
])

In [10]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [11]:
model.fit(x_train, y_train)

In [12]:
y_pred = model.predict(x_test)

In [13]:
print(y_pred)

[1 1 0 ... 0 0 1]


In [14]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.519
Confusion Matrix:
 [[606 424]
 [538 432]]
Classification Report:
               precision    recall  f1-score   support

           0       0.53      0.59      0.56      1030
           1       0.50      0.45      0.47       970

    accuracy                           0.52      2000
   macro avg       0.52      0.52      0.52      2000
weighted avg       0.52      0.52      0.52      2000



In [15]:
# Define the sample match as a DataFrame
sample_input = pd.DataFrame([{
    'team': 'Sri Lanka',
    'opponent': 'England',
    'team_score_innings1': 143,
    'toss': 'bat',
    'ground': 'Gaddafi Stadium'
}])

# Make prediction
y_pred = model.predict(sample_input)

# Output result
print("Predicted Result:", "Win" if y_pred[0] == 1 else "Loss")


Predicted Result: Loss


In [16]:
import joblib
joblib.dump(model, 'cricket_win_predictor.pkl')

['cricket_win_predictor.pkl']