In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier


In [2]:
data = pd.read_csv("differentials.csv")
X = data[["Reach (inches) Differential", "Height (inches) Differential", "Significant Strikes Landed per Minute Differential", "Significant Striking Accuracy Differential", 
         "Significant Strikes Absorbed per Minute Differential", "Significant Strike Defence (the percentage of opponents strikes that did not land) Differential",
         "Average Takedowns Landed per 15 minutes Differential", "Takedown Accuracy Differential", "Takedown Defense (the percentage of opponents TD attempts that did not land) Differential",
         "Average Submissions Attempted per 15 minutes"]]
Y = data["Winner"]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

In [8]:
model = LogisticRegression()
model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)
print(model.score(X_test, Y_test))

features = X_train.columns
coefficients = model.coef_[0]
importance = list(zip(features, coefficients))
importance = sorted(importance, key=lambda x: abs(x[1]), reverse=True)
print("Feature importance:")
for feature, coef in importance:
    print(f"{feature}: {coef}")


0.6331403762662807
Feature importance:
Significant Striking Accuracy Differential: 1.2341019539942693
Takedown Defense (the percentage of opponents TD attempts that did not land) Differential: 0.9673895121611371
Significant Strike Defence (the percentage of opponents strikes that did not land) Differential: 0.46930240509747234
Significant Strikes Landed per Minute Differential: 0.3337993602118872
Significant Strikes Absorbed per Minute Differential: -0.27171456238558195
Takedown Accuracy Differential: -0.15529977190785485
Average Submissions Attempted per 15 minutes: 0.11624746326257802
Average Takedowns Landed per 15 minutes Differential: 0.1115143563487816
Reach (inches) Differential: 0.0278147729126593
Height (inches) Differential: 0.006674002480189565


In [10]:
model = RandomForestClassifier()
model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)
model.score(X_test, Y_test)


0.6215629522431259

In [69]:
model = SVC()
model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)
model.score(X_test, Y_test)

0.6642547033285094

In [70]:
model = GradientBoostingClassifier()
model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)
model.score(X_test, Y_test)

0.6548480463096961

In [9]:
fightsDF = pd.read_csv("fightStats.csv")
fightersDF = pd.read_csv("fighterStats.csv")
columns = ["Reach (inches) Differential",
           "Height (inches) Differential",
           "Significant Strikes Landed per Minute Differential",
           "Significant Striking Accuracy Differential",
           "Significant Strikes Absorbed per Minute Differential",
           "Significant Strike Defence (the percentage of opponents strikes that did not land) Differential",
           "Average Takedowns Landed per 15 minutes Differential",
           "Takedown Accuracy Differential",
           "Takedown Defense (the percentage of opponents TD attempts that did not land) Differential",
           "Average Submissions Attempted per 15 minutes"
           ]
df = pd.DataFrame(columns=columns)
def predictor(fighter1, fighter2):

    redReach = fightersDF.loc[fightersDF["Name"] == fighter1, "Reach (inches)"].values[0]
    blueReach = fightersDF.loc[fightersDF["Name"] == fighter2, "Reach (inches)"].values[0]
    reachDifference = redReach - blueReach

    redHeight = fightersDF.loc[fightersDF["Name"] == fighter1, "Height (inches)"].values[0]
    blueHeight = fightersDF.loc[fightersDF["Name"] == fighter2, "Height (inches)"].values[0]
    heightDifference = redHeight - blueHeight

    redSSPerMin = fightersDF.loc[fightersDF["Name"] == fighter1, "Significant Strikes Landed per Minute"].values[0]
    blueSSPerMin = fightersDF.loc[fightersDF["Name"] == fighter2, "Significant Strikes Landed per Minute"].values[0]
    ssPerMinDifference = redSSPerMin - blueSSPerMin

    redSSAcc = fightersDF.loc[fightersDF["Name"] == fighter1, "Significant Striking Accuracy"].values[0]
    blueSSAcc = fightersDF.loc[fightersDF["Name"] == fighter2, "Significant Striking Accuracy"].values[0]
    ssAccDifference = redSSAcc - blueSSAcc

    redSSAbs = fightersDF.loc[fightersDF["Name"] == fighter1, "Significant Strikes Absorbed per Minute"].values[0]
    blueSSAbs = fightersDF.loc[fightersDF["Name"] == fighter2, "Significant Strikes Absorbed per Minute"].values[0]
    ssAbsDifference = redSSAbs - blueSSAbs

    redSSDef = fightersDF.loc[fightersDF["Name"] == fighter1, "Significant Strike Defence (the percentage of opponents strikes that did not land)"].values[0]
    blueSSDef = fightersDF.loc[fightersDF["Name"] == fighter2, "Significant Strike Defence (the percentage of opponents strikes that did not land)"].values[0]
    ssDefDifference = redSSDef - blueSSDef

    redAvgTD = fightersDF.loc[fightersDF["Name"] == fighter1, "Average Takedowns Landed per 15 minutes"].values[0]
    blueAvgTD = fightersDF.loc[fightersDF["Name"] == fighter2, "Average Takedowns Landed per 15 minutes"].values[0]
    avgTDDifference = redAvgTD - blueAvgTD

    redTDAcc = fightersDF.loc[fightersDF["Name"] == fighter1, "Takedown Accuracy"].values[0]
    blueTDAcc = fightersDF.loc[fightersDF["Name"] == fighter2, "Takedown Accuracy"].values[0]
    tdAccDifference = redTDAcc - blueTDAcc

    redTDDef = fightersDF.loc[fightersDF["Name"] == fighter1, "Takedown Defense (the percentage of opponents TD attempts that did not land)"].values[0]
    blueTDDef = fightersDF.loc[fightersDF["Name"] == fighter2, "Takedown Defense (the percentage of opponents TD attempts that did not land)"].values[0]
    tdDefDifference = redTDDef - blueTDDef

    redAvgSub = fightersDF.loc[fightersDF["Name"] == fighter1, "Average Submissions Attempted per 15 minutes"].values[0]
    blueAvgSub = fightersDF.loc[fightersDF["Name"] == fighter2, "Average Submissions Attempted per 15 minutes"].values[0]
    avgSubDifference = redAvgSub - blueAvgSub

    row = [reachDifference, heightDifference, ssPerMinDifference, ssAccDifference, ssAbsDifference,
           ssDefDifference, avgTDDifference, tdAccDifference, tdDefDifference, avgSubDifference]
    
    rowDF = pd.DataFrame([row], columns=columns)
    probWin = model.predict_proba(rowDF)[0][1]*100
    probLoss = model.predict_proba(rowDF)[0][0]*100
    prediction = model.predict(rowDF)
    if prediction == 0:
        print("Predicted winner: " + fighter2 + ". " + str(probLoss) + "%% chance of winning")
    else:
        print("Predicted winner: " + fighter1 + ". " + str(probWin) + "%% chance of winning")

    
    

predictor("Ilia Topuria", "Islam Makhachev")
predictor("Jon Jones", "Tom Aspinall")

Predicted winner: Ilia Topuria. 54.22526146454033%% chance of winning
Predicted winner: Tom Aspinall. 72.72213636874444%% chance of winning
