In [9]:
# Step 1: Import libraries
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import numpy as np


In [10]:
# Step 2: Load cleaned dataset
df = pd.read_csv("cleaned_ipl_dataset.csv")
df.head()


Unnamed: 0,season,city,match_type,venue,team1,team2,toss_winner,toss_decision,winner,target_runs,target_overs,super_over
0,2007/08,Bangalore,League,M Chinnaswamy Stadium,Royal Challengers Bangalore,Kolkata Knight Riders,Royal Challengers Bangalore,field,Kolkata Knight Riders,223.0,20.0,N
1,2007/08,Chandigarh,League,"Punjab Cricket Association Stadium, Mohali",Kings XI Punjab,Chennai Super Kings,Chennai Super Kings,bat,Chennai Super Kings,241.0,20.0,N
2,2007/08,Delhi,League,Feroz Shah Kotla,Delhi Daredevils,Rajasthan Royals,Rajasthan Royals,bat,Delhi Daredevils,130.0,20.0,N
3,2007/08,Mumbai,League,Wankhede Stadium,Mumbai Indians,Royal Challengers Bangalore,Mumbai Indians,bat,Royal Challengers Bangalore,166.0,20.0,N
4,2007/08,Kolkata,League,Eden Gardens,Kolkata Knight Riders,Deccan Chargers,Deccan Chargers,bat,Kolkata Knight Riders,111.0,20.0,N


In [11]:
# Step 3: Encode categorical columns
le = LabelEncoder()
df_encoded = df.apply(le.fit_transform)

# Features and label
X = df_encoded.drop('winner', axis=1)
y = df_encoded['winner']

In [12]:
# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [13]:
from sklearn.tree import DecisionTreeClassifier

cart_model = DecisionTreeClassifier(criterion="gini", random_state=42)
cart_model.fit(X_train, y_train)
y_pred_cart = cart_model.predict(X_test)

print("CART Results (Gini Index):")
print("Accuracy:", accuracy_score(y_test, y_pred_cart))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_cart))
print(classification_report(y_test, y_pred_cart))


CART Results (Gini Index):
Accuracy: 0.4012738853503185
Confusion Matrix:
 [[13  0  0  2  3  3  2  2  0  2]
 [ 0  0  0  0  0  0  0  0  0  0]
 [ 1  1  1  0  0  1  0  1  1  0]
 [ 1  1  1  1  1  1  1  2  0  1]
 [ 1  1  0  3  5  3  2  1  2  0]
 [ 1  1  0  0  2 12  0  1  1  2]
 [ 2  1  0  0  2  4 10  2  2  0]
 [ 0  0  2  1  1  2  2  6  4  1]
 [ 0  2  1  0  1  2  3  2  9  2]
 [ 1  0  0  0  1  1  0  1  2  6]]
              precision    recall  f1-score   support

           0       0.65      0.48      0.55        27
           1       0.00      0.00      0.00         0
           2       0.20      0.17      0.18         6
           3       0.14      0.10      0.12        10
           4       0.31      0.28      0.29        18
           5       0.41      0.60      0.49        20
           6       0.50      0.43      0.47        23
           7       0.33      0.32      0.32        19
           8       0.43      0.41      0.42        22
           9       0.43      0.50      0.46        12

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [14]:
id3_model = DecisionTreeClassifier(criterion="entropy", random_state=42)
id3_model.fit(X_train, y_train)
y_pred_id3 = id3_model.predict(X_test)

print("ID3 Results (Information Gain):")
print("Accuracy:", accuracy_score(y_test, y_pred_id3))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_id3))
print(classification_report(y_test, y_pred_id3))


ID3 Results (Information Gain):
Accuracy: 0.4140127388535032
Confusion Matrix:
 [[18  0  0  3  2  2  0  0  1  1]
 [ 0  0  0  0  0  0  0  0  0  0]
 [ 1  0  2  0  0  0  0  2  1  0]
 [ 1  1  0  3  2  1  1  1  0  0]
 [ 1  1  0  2  3  6  1  2  2  0]
 [ 1  2  0  1  2 11  1  0  0  2]
 [ 2  0  1  0  1  2 11  2  2  2]
 [ 0  0  2  1  0  2  4  6  3  1]
 [ 1  1  1  3  3  2  3  1  5  2]
 [ 1  0  1  0  2  0  0  0  2  6]]
              precision    recall  f1-score   support

           0       0.69      0.67      0.68        27
           1       0.00      0.00      0.00         0
           2       0.29      0.33      0.31         6
           3       0.23      0.30      0.26        10
           4       0.20      0.17      0.18        18
           5       0.42      0.55      0.48        20
           6       0.52      0.48      0.50        23
           7       0.43      0.32      0.36        19
           8       0.31      0.23      0.26        22
           9       0.43      0.50      0.46     

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [15]:
c45_model = DecisionTreeClassifier(criterion="entropy", max_depth=5, min_samples_split=10, random_state=42)
c45_model.fit(X_train, y_train)
y_pred_c45 = c45_model.predict(X_test)

print("C4.5 Results (Gain Ratio Approx.):")
print("Accuracy:", accuracy_score(y_test, y_pred_c45))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_c45))
print(classification_report(y_test, y_pred_c45))


C4.5 Results (Gain Ratio Approx.):
Accuracy: 0.43312101910828027
Confusion Matrix:
 [[17  0  0  2  3  0  1  2  1  1]
 [ 0  0  0  0  0  0  0  0  0  0]
 [ 0  0  3  0  0  0  0  2  1  0]
 [ 0  1  0  0  4  0  2  2  0  1]
 [ 2  0  0  0  5  2  3  2  4  0]
 [ 0  1  0  0  5  6  0  3  2  3]
 [ 2  1  0  1  0  4  9  1  4  1]
 [ 0  0  2  1  0  1  2  9  3  1]
 [ 1  1  2  1  0  1  0  0 14  2]
 [ 1  0  1  0  0  0  2  1  2  5]]
              precision    recall  f1-score   support

           0       0.74      0.63      0.68        27
           1       0.00      0.00      0.00         0
           2       0.38      0.50      0.43         6
           3       0.00      0.00      0.00        10
           4       0.29      0.28      0.29        18
           5       0.43      0.30      0.35        20
           6       0.47      0.39      0.43        23
           7       0.41      0.47      0.44        19
           8       0.45      0.64      0.53        22
           9       0.36      0.42      0.38 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [8]:
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted')
    rec = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    cm = confusion_matrix(y_test, y_pred)

    results.append({"Model": name, "Accuracy": acc, "Precision": prec, "Recall": rec, "F1 Score": f1})

    print(f"\nModel: {name}")
    print(f"Accuracy: {acc:.2f}")
    print(f"Precision: {prec:.2f}")
    print(f"Recall: {rec:.2f}")
    print(f"F1 Score: {f1:.2f}")

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Model: CART (gini)
Accuracy: 0.40
Precision: 0.41
Recall: 0.40
F1 Score: 0.39

Model: ID3 (entropy)
Accuracy: 0.48
Precision: 0.50
Recall: 0.48
F1 Score: 0.48

Model: C4.5 Approx (entropy + min_samples_split=10)
Accuracy: 0.49
Precision: 0.51
Recall: 0.49
F1 Score: 0.48

Model: Random Forest
Accuracy: 0.49
Precision: 0.50
Recall: 0.49
F1 Score: 0.48


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [16]:
print("CHAID implementation requires external libraries like 'CHAID' or manual Chi-square split logic.")


CHAID implementation requires external libraries like 'CHAID' or manual Chi-square split logic.


In [17]:
print("Accuracy Scores:")
print("CART :", accuracy_score(y_test, y_pred_cart))
print("ID3  :", accuracy_score(y_test, y_pred_id3))
print("C4.5 :", accuracy_score(y_test, y_pred_c45))


Accuracy Scores:
CART : 0.4012738853503185
ID3  : 0.4140127388535032
C4.5 : 0.43312101910828027
