In [126]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import pandas as pd

In [35]:
df = pd.read_csv("./mma.csv")
df

Unnamed: 0,FighterId,Winner,WeightClass,FighterName,Gender,Age,Reach,StrikingSkill,GrapplingSkill,SubmissionSkill,Experience,LastFightResult,FightingStyle
0,1,YES,Welterweight,Kamaru Usman,male,35,76,85,70,60,20,Win,All-Rounder
1,2,NO,Lightweight,Justin Gaethje,male,34,70,95,55,60,25,Loss,Striker
2,3,YES,Featherweight,Alexander Volkanovski,male,34,71,80,75,65,25,Win,Striker
3,4,NO,Middleweight,Robert Whittaker,male,32,73,85,70,60,25,Loss,All-Rounder
4,5,YES,Light Heavyweight,Jamahal Hill,male,31,76,90,70,60,10,Win,All-Rounder
...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,66,YES,Flyweight,Andrea Lee,female,34,64,80,75,70,10,Win,All-Rounder
66,67,NO,Bantamweight,Kaitlyn Chookagian,female,34,66,85,80,75,20,Loss,All-Rounder
67,68,YES,Featherweight,Norma Dumont,female,32,68,80,75,70,10,Win,All-Rounder
68,69,NO,Strawweight,Lina Lansberg,female,40,66,80,75,70,15,Loss,Striker


In [99]:
selected_cols = [
    "WeightClass",
    "Gender",
    "Age",
    "Reach",
    "StrikingSkill",
    "GrapplingSkill",
    "SubmissionSkill",
    "Experience",
    "LastFightResult",
    "FightingStyle",
]

In [111]:
cat_cols = df.select_dtypes("object").drop(columns=["Winner", "FighterName"]).columns
num_cols = df.select_dtypes("int64").drop(columns=["FighterId"]).columns

df[cat_cols]

Unnamed: 0,WeightClass,Gender,LastFightResult,FightingStyle
0,Welterweight,male,Win,All-Rounder
1,Lightweight,male,Loss,Striker
2,Featherweight,male,Win,Striker
3,Middleweight,male,Loss,All-Rounder
4,Light Heavyweight,male,Win,All-Rounder
...,...,...,...,...
65,Flyweight,female,Win,All-Rounder
66,Bantamweight,female,Loss,All-Rounder
67,Featherweight,female,Win,All-Rounder
68,Strawweight,female,Loss,Striker


In [149]:
onehot_encoder = OneHotEncoder(sparse_output=False)
encoded_train_dataset = onehot_encoder.fit_transform(df[cat_cols])

encoded_train_df = pd.DataFrame(
    encoded_train_dataset, columns=onehot_encoder.get_feature_names_out()
)

encoded_train_df = pd.concat(
    [pd.concat([df["Winner"], df[num_cols]], axis=1), encoded_train_df], axis=1
)

X_train, X_test, Y_train, Y_test = train_test_split(
    encoded_train_df.drop(columns=["Winner"]), encoded_train_df[["Winner"]]
)

In [150]:
model = DecisionTreeClassifier()
model.fit(X_train, Y_train)

In [151]:
test_in = pd.DataFrame(
    [["Heavyweight", "male", 24, 76, 50, 80, 90, 50, "Win", "Grappler"]],
    columns=selected_cols,
)
test_in[cat_cols]

Unnamed: 0,WeightClass,Gender,LastFightResult,FightingStyle
0,Heavyweight,male,Win,Grappler


In [152]:
input_df = pd.DataFrame(
    onehot_encoder.transform(test_in[cat_cols]),
    columns=onehot_encoder.get_feature_names_out(),
)
input_df = pd.concat([test_in[num_cols], input_df], axis=1)
input_df

Unnamed: 0,Age,Reach,StrikingSkill,GrapplingSkill,SubmissionSkill,Experience,WeightClass_Bantamweight,WeightClass_Featherweight,WeightClass_Flyweight,WeightClass_Heavyweight,...,Gender_female,Gender_male,LastFightResult_Loss,LastFightResult_Win,FightingStyle_ All-Rounder,FightingStyle_All-Rounder,FightingStyle_Grappler,FightingStyle_Striker,FightingStyle_Striker ```plaintext,FightingStyle_Wrestler
0,24,76,50,80,90,50,0.0,0.0,0.0,1.0,...,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0


In [158]:
print(f"Accuracy: {model.score(X_test, Y_test)}")

Accuracy: 1.0


In [157]:
if model.predict(input_df)[0] == "YES":
    print("Win")
else:
    print("Lose")

Win
