In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from fractions import Fraction
import pickle
import random

In [2]:
# Import data
dfWin = pd.read_csv("./DataFrames/WinnersProcessed.csv", index_col=0).dropna(subset=["expected_dmg"]).reset_index(drop=True)
dfLose = pd.read_csv("./DataFrames/LosersProcessed.csv", index_col=0).dropna(subset=["expected_dmg"]).reset_index(drop=True)
dfWin.head()

Unnamed: 0,name,desc,cr,immunities,resists,conditions,wins,actions,hp,str,dex,con,int,wis,cha,ac,spd,winlen,expected_dmg
0,Wraith of Ogre Power,Medium Undead Neutral Evil,5.0,"Necrotic,Poison,Charmed,Exhaustion,Grappled,Pa...","Acid,Cold,Fire,Lightning,Thunder,Bludgeoning,P...",Gauntlets of Ogre Power,"Werebear, Air Elemental, Troll, Roper, CR 5","Life Drain: +11, 4d8+9 Necrotic & Life Drain",39,19,16,16,12,14,15,13,60,5,18.396
1,Oni,Large Giant Lawful Evil,7.0,,,"Death Defying,Regeneration",,"Glaive: +6, 10ft, 2d10+4 Slashing\tMultiattack...",55,19,11,16,14,12,15,16,30,0,7.961
2,Giant Ape,Huge Beast Unaligned,7.0,,,Shielded,,"Fist: +8, 10ft, 3d10+6 Bludgeoning\tRock: +8, ...",69,23,14,18,7,12,7,12,40,0,12.907
3,Young Brass Dragon,Large Dragon Chaotic Good,6.0,Fire,,Reliable Damage,,"Multiattack: 10ft, Bite, 2 Claw\tBite: +6, 10f...",61,19,10,17,12,11,15,17,80,0,35.0
4,Fire Elemental,Large Elemental Neutral,5.0,"Fire,Poison,Exhaustion,Grappled,Paralyzed,Petr...","Bludgeoning,Piercing,Slashing","+2 CHA,Heated Body",,"Touch: +5, 2d6+3 Fire, On Fire\tMultiattack: F...",46,10,17,16,6,10,9,13,50,0,14.116


I forgot to put which side the winner and loser were on, therefore I need to randomize where to put the winning creature and where to put the losing creature.
This will allow the model from not constantly predicting the creature on a specific side

In [41]:
import random
# Returns all stat values of a dataframe as a 2d array
def getStatsValues(df: pd.DataFrame):
    return df.loc[:, ["cr", "hp", "str", "dex", "con", "int", "wis", "cha", "ac", "spd", "expected_dmg"]].values

def convertToFloat(val):
    if type(val) == str:
        return float(Fraction(val))
    else:
        return float(val)

dfWin["cr"] = dfWin.apply(lambda row: convertToFloat(row["cr"]), axis=1)
dfLose["cr"] = dfLose.apply(lambda row: convertToFloat(row["cr"]), axis=1)
# Get Input values
dfWinStats = getStatsValues(dfWin)
dfLoseStats = getStatsValues(dfLose)
# Append input values together
dfStats = np.concatenate([dfWinStats, dfLoseStats], axis=1)
# Generate random list representing which side won and which side lost

# Make the right side outputs equal 1
rightSide = np.ones(dfStats.shape[0]//2)
# Make left side be zero
leftSide = np.zeros(dfStats.shape[0]//2)
# Append all values to targets
targets = np.append(leftSide, rightSide)
# Shuffle results
random.shuffle(targets)


data = []

for i in range(dfWinStats.shape[0] - 1):
    if targets[i]:
        res = np.concatenate([dfLoseStats[i], dfWinStats[i]])
    else:
        res = np.concatenate([dfWinStats[i], dfLoseStats[i]])
    data.append(res)

In [3]:
def convertToFloat(val):
    if type(val) == str:
        return float(Fraction(val))
    else:
        return float(val)

def getDifData(df1: pd.DataFrame, df2: pd.DataFrame, targets: list, stats=['ac', 'cr', 'spd', 'hp', 'str', 'dex', 'con', 'int', 'wis', 'cha', 'expected_dmg']):
    vals = np.zeros((df1.shape[0], len(stats)))
    for (i, r1), (_, r2) in zip(df1.iterrows(), df2.iterrows()):
        for j, stat in enumerate(stats):
            # If a stat is greater than a believable amount make the stat equal to its opponent
            # This allows for better generalization of the data
            if r1[stat] >= 900: r1[stat] = r2[stat]
            if r2[stat] >= 900: r2[stat] = r1[stat]
            vals[i, j] = r1[stat] - r2[stat] if targets[i] else r2[stat] - r1[stat]
    return vals


rightSize = dfWin.shape[0]//2
leftSize = dfWin.shape[0] - rightSize
# Make the right side outputs equal 1
rightSide = np.ones(rightSize)
# Make left side be zero
leftSide = np.zeros(leftSize)
# Append all values to targets
targets = np.append(leftSide, rightSide)
# Shuffle results
random.shuffle(targets)


dfWin["cr"] = dfWin.apply(lambda row: convertToFloat(row["cr"]), axis=1)
dfLose["cr"] = dfLose.apply(lambda row: convertToFloat(row["cr"]), axis=1)

data = getDifData(dfWin, dfLose, targets)

# Create scaler
scaler = MinMaxScaler()
scaler.fit(data)
data = scaler.transform(data)
data

array([[0.53571429, 0.5625    , 0.35      , ..., 0.44642857, 0.36363636,
        0.41665311],
       [0.5       , 0.5625    , 0.5       , ..., 0.44642857, 0.49090909,
        0.48753231],
       [0.64285714, 0.5       , 0.45      , ..., 0.42857143, 0.41818182,
        0.47037188],
       ...,
       [0.60714286, 0.4921875 , 0.5       , ..., 0.57142857, 0.65454545,
        0.49893749],
       [0.46428571, 0.5       , 0.45      , ..., 0.375     , 0.50909091,
        0.49093041],
       [0.5       , 0.5       , 0.35      , ..., 0.41071429, 0.45454545,
        0.48903992]])

In [43]:
# Find better ways to normalize results does not work currently

# def normalize(point):
#     if not point:
#         return 0
#     else: return 1/point

# for i in range(data.shape[0]):
#     for j in range(data.shape[1]):
#         data[i,j] = normalize(data[i,j])
# data

In [44]:
# Get the distribution of target values

one = 0
zero = 0

for val in targets:
    if val:
        one += 1
    else:
        zero += 1

print(f"One: {one}")
print(f"Zero: {zero}")

One: 5454
Zero: 5454


In [4]:
splitSize = 0.80

X_train, X_test, y_train, y_test = train_test_split(data, targets, test_size=(1-splitSize), random_state=47)


In [46]:
print(len(X_test[0]))

11


In [5]:
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RandomizedSearchCV

# params = { 'hidden_layer_sizes': [(200, 100, 90), (2000, 500, 80), (2000, 1500, 80), (3000, 1000, 80)]
#             'solver': ['lbfgs']
# }

lrs = [1e-4, 0.5e-4, 1e-5, 1.5e-5, 2e-5, 1e-6, 1.5e-6, 2e-6 ]
hiddenLayers = [(2500, 2000, 500), (2000, 1000, 500), ()]
history = []
# for lr in lrs:
#     # TODO: Implement RandomSearch framework
#     clf = MLPClassifier(hidden_layer_sizes=(5000), max_iter=10000, solver='lbfgs', activation='tanh', learning_rate='adaptive', learning_rate_init=lr, early_stopping=True, verbose=True)
#     clf.fit(X_train, y_train)
#     predictions = clf.predict(X_test)
#     history.append(accuracy_score(y_test, predictions))
clf = MLPClassifier(hidden_layer_sizes=(10000, 2000), max_iter=500, solver='lbfgs', activation='tanh', learning_rate='adaptive', learning_rate_init=1e-5, verbose=True, early_stopping=True)
clf.fit(X_train, y_train)
#predictions = clf.predict(X_test)
# gs = GridSearchCV(clf)
# print(history)

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =     20124001     M =           10

At X0         0 variables are exactly at the bounds


 This problem is unconstrained.



At iterate    0    f=  6.93317D-01    |proj g|=  9.59157D-03

At iterate    1    f=  6.93116D-01    |proj g|=  8.45121D-04

At iterate    2    f=  6.80713D-01    |proj g|=  9.39870D-03

At iterate    3    f=  6.73347D-01    |proj g|=  2.13319D-02

At iterate    4    f=  6.70494D-01    |proj g|=  1.15136D-02

At iterate    5    f=  6.69562D-01    |proj g|=  5.75000D-03

At iterate    6    f=  6.68396D-01    |proj g|=  2.07418D-03

At iterate    7    f=  6.65161D-01    |proj g|=  3.19852D-03

At iterate    8    f=  6.60236D-01    |proj g|=  3.65084D-03

At iterate    9    f=  6.57943D-01    |proj g|=  2.51256D-02

At iterate   10    f=  6.54792D-01    |proj g|=  1.20696D-03

At iterate   11    f=  6.53723D-01    |proj g|=  3.39864D-03

At iterate   12    f=  6.52760D-01    |proj g|=  1.87454D-03

At iterate   13    f=  6.50926D-01    |proj g|=  2.23464D-02

At iterate   14    f=  6.49603D-01    |proj g|=  5.53379D-03

At iterate   15    f=  6.49356D-01    |proj g|=  1.16024D-03

At iter

In [6]:
predictions = clf.predict(X_test)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

         0.0       0.62      0.62      0.62      1088
         1.0       0.62      0.62      0.62      1094

    accuracy                           0.62      2182
   macro avg       0.62      0.62      0.62      2182
weighted avg       0.62      0.62      0.62      2182



In [24]:
confusion_matrix(y_test, predictions)

array([[672, 438],
       [385, 687]])

In [50]:


# save the classifier to a file
with open("./Models/classifier_64perc.pkl", "wb") as file:
    pickle.dump(clf, file)

In [51]:
# load the saved classifier from the file
with open("./Models/classifier_64perc.pkl", "rb") as file:
    clf = pickle.load(file)

predictions = clf.predict(X_test)
print(classification_report(y_test, predictions))
clf

              precision    recall  f1-score   support

         0.0       0.60      0.59      0.59      1051
         1.0       0.63      0.64      0.63      1131

    accuracy                           0.61      2182
   macro avg       0.61      0.61      0.61      2182
weighted avg       0.61      0.61      0.61      2182



In [52]:
preds = clf.predict(X_train)
print(classification_report(y_train, preds))

              precision    recall  f1-score   support

         0.0       0.65      0.65      0.65      4403
         1.0       0.64      0.65      0.65      4323

    accuracy                           0.65      8726
   macro avg       0.65      0.65      0.65      8726
weighted avg       0.65      0.65      0.65      8726



In [53]:
clf