In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.neural_network import MLPClassifier
from fractions import Fraction
import pickle
import random

In [99]:
# Import data
dfWin = pd.read_csv("./DataFrames/Winners.csv", index_col=0)
dfLose = pd.read_csv("./DataFrames/Losers.csv", index_col=0)
dfWin.head()

Unnamed: 0,name,desc,cr,immunities,resists,conditions,wins,actions,hp,str,dex,con,int,wis,cha,ac,spd
0,Wraith of Ogre Power,Medium Undead Neutral Evil,5,"Necrotic,Poison,Charmed,Exhaustion,Grappled,Pa...","Acid,Cold,Fire,Lightning,Thunder,Bludgeoning,P...",Gauntlets of Ogre Power,"Werebear, Air Elemental, Troll, Roper, CR 5","Life Drain: +11, 4d8+9 Necrotic & Life Drain",39,19,16,16,12,14,15,13,60
1,Oni,Large Giant Lawful Evil,7,,,"Death Defying,Regeneration",,"Glaive: +6, 10ft, 2d10+4 Slashing\tMultiattack...",55,19,11,16,14,12,15,16,30
2,Giant Ape,Huge Beast Unaligned,7,,,Shielded,,"Fist: +8, 10ft, 3d10+6 Bludgeoning\tRock: +8, ...",69,23,14,18,7,12,7,12,40
3,Young Brass Dragon,Large Dragon Chaotic Good,6,Fire,,Reliable Damage,,"Multiattack: 10ft, Bite, 2 Claw\tBite: +6, 10f...",61,19,10,17,12,11,15,17,80
4,Fire Elemental,Large Elemental Neutral,5,"Fire,Poison,Exhaustion,Grappled,Paralyzed,Petr...","Bludgeoning,Piercing,Slashing","+2 CHA,Heated Body",,"Touch: +5, 2d6+3 Fire, On Fire\tMultiattack: F...",46,10,17,16,6,10,9,13,50


I forgot to put which side the winner and loser were on, therefore I need to randomize where to put the winning creature and where to put the losing creature.
This will allow the model from not constantly predicting the creature on a specific side

In [51]:
import random
# Returns all stat values of a dataframe as a 2d array
def getStatsValues(df: pd.DataFrame):
    return df.loc[:, ["cr", "hp", "str", "dex", "con", "int", "wis", "cha", "ac", "spd"]].values

def convertToFloat(val):
    if type(val) == str:
        return float(Fraction(val))
    else:
        return float(val)

dfWin["cr"] = dfWin.apply(lambda row: convertToFloat(row["cr"]), axis=1)
dfLose["cr"] = dfLose.apply(lambda row: convertToFloat(row["cr"]), axis=1)
# Get Input values
dfWinStats = getStatsValues(dfWin)
dfLoseStats = getStatsValues(dfLose)
# Append input values together
dfStats = np.concatenate([dfWinStats, dfLoseStats], axis=1)
# Generate random list representing which side won and which side lost

# Make the right side outputs equal 1
rightSide = np.ones(dfStats.shape[0]//2)
# Make left side be zero
leftSide = np.zeros(dfStats.shape[0]//2)
# Append all values to targets
targets = np.append(leftSide, rightSide)
# Shuffle results
random.shuffle(targets)


data = []

for i in range(dfWinStats.shape[0]):
    if targets[i]:
        res = np.concatenate([dfLoseStats[i], dfWinStats[i]])
    else:
        res = np.concatenate([dfWinStats[i], dfLoseStats[i]])
    data.append(res)

[0. 0. 1. ... 0. 1. 1.]


In [100]:
def convertToFloat(val):
    if type(val) == str:
        return float(Fraction(val))
    else:
        return float(val)

def getDifData(df1: pd.DataFrame, df2: pd.DataFrame, targets: list, stats=['ac', 'cr', 'spd', 'hp', 'str', 'dex', 'con', 'int', 'wis', 'cha']):
    vals = np.zeros((df1.shape[0], len(stats)))
    for (i, r1), (_, r2) in zip(df1.iterrows(), df2.iterrows()):
        for j, stat in enumerate(stats):
            # If a stat is greater than a believable amount make the stat equal to its opponent
            # This allows for better generalization of the data
            if r1[stat] >= 900: r1[stat] = r2[stat]
            if r2[stat] >= 900: r2[stat] = r1[stat]
            vals[i, j] = r1[stat] - r2[stat] if targets[i] else r2[stat] - r1[stat]
    return vals

# Make the right side outputs equal 1
rightSide = np.ones(dfWin.shape[0]//2)
# Make left side be zero
leftSide = np.zeros(dfWin.shape[0]//2)
# Append all values to targets
targets = np.append(leftSide, rightSide)
# Shuffle results
random.shuffle(targets)


dfWin["cr"] = dfWin.apply(lambda row: convertToFloat(row["cr"]), axis=1)
dfLose["cr"] = dfLose.apply(lambda row: convertToFloat(row["cr"]), axis=1)

data = getDifData(dfWin, dfLose, targets)
for row in data:
    print(row)

[-2. -1. 30. 24.  2.  1. -2.  4.  1.  7.]
[  1.   1.   0. -15.   9.  -4.   0.   2.  -1.   0.]
[-5.  0. 10.  6.  5.  6.  0.  0.  2.  4.]
[ 2. -2. 40. 24.  1. -7.  3. -4. -4. -1.]
[  0.  -3.   0. -34. -15.   7.  -3.   4.  -2.   0.]
[ -6.   0.  10.  -7.   8.  -7.   3. -10.  -3.  -6.]
[ -3.   1.   0. -18.   3.  -5.  -2.   1.  -2.   3.]
[ -6.   1. -40.  -8.  -6.  -3.  -1. -12. -10.  -9.]
[  4.   1.  20. -38.   0.  -6.   1.   0.  -3.   0.]
[ -2.   1. -20.   6.   4.   3.  -1.   4.  -2.  -3.]
[ -3.  -2.  30. -16.   0.   5.   0.  -2.   2.   0.]
[  1.   1.   0.  28.  -1.   1.   1.  -9.  -5. -11.]
[ 2.  3. 50. 40.  4. -1.  5.  0. -1.  4.]
[ 0.  1.  0. -2.  2.  0.  2.  0. -2.  2.]
[  1.  -1.  10. -18.   0.   3.   2.  -1.   2.   6.]
[  1.   1. -40. -16.  -1.   4.   1.  -3.   1.   1.]
[  1.   2. -20. -11.   2.  -8.  -5.  13.   3.   5.]
[ 0.  2. 10. 13.  4. -5.  6. -2.  3. -2.]
[ -3.  -2. -40.  -6.   0.  -5.   0.  -7.  -3.  -8.]
[-1.  1. 40. 23.  5. -6.  3.  1. -3.  3.]
[ -2.  -1. -50.  -4.   0.  -6.

In [20]:
# Find better ways to normalize results does not work currently

# def normalize(point):
#     if not point:
#         return 0
#     else: return 1/point

# for i in range(data.shape[0]):
#     for j in range(data.shape[1]):
#         data[i,j] = normalize(data[i,j])
# data

array([[ 0.5       ,  1.        , -0.03333333, ..., -0.25      ,
        -1.        , -0.14285714],
       [-1.        , -1.        ,  0.        , ..., -0.5       ,
         1.        ,  0.        ],
       [ 0.2       ,  0.        , -0.1       , ...,  0.        ,
        -0.5       , -0.25      ],
       ...,
       [ 0.        ,  0.        ,  0.025     , ...,  0.33333333,
         0.11111111,  0.25      ],
       [ 0.5       , -0.5       ,  0.05      , ...,  0.        ,
        -0.33333333, -0.25      ],
       [ 0.2       ,  0.        , -0.1       , ..., -1.        ,
        -1.        , -0.5       ]])

In [101]:
# Get the distribution of target values

one = 0
zero = 0

for val in targets:
    if val:
        one += 1
    else:
        zero += 1

print(f"One: {one}")
print(f"Zero: {zero}")

One: 1979
Zero: 1979


In [111]:
splitSize = 0.8

X_train, X_test, y_train, y_test = train_test_split(data, targets, test_size=(1-splitSize), random_state=47)


In [43]:
print(len(X_test[0]))

10


In [113]:
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RandomizedSearchCV

# params = { 'hidden_layer_sizes': [(200, 100, 90), (2000, 500, 80), (2000, 1500, 80), (3000, 1000, 80)]
#             'solver': ['lbfgs']
# }
# TODO: Implement RandomSearch framework
clf = MLPClassifier(hidden_layer_sizes=(1500, 500, 100, 50), max_iter=10000, activation='tanh', learning_rate='adaptive', learning_rate_init=1e-5, early_stopping=True, verbose=True)

#gs = GridSearchCV(clf)

clf.fit(X_train, y_train)

Iteration 1, loss = 0.70214789
Validation score: 0.555205
Iteration 2, loss = 0.68179570
Validation score: 0.561514
Iteration 3, loss = 0.67231565
Validation score: 0.583596
Iteration 4, loss = 0.66762087
Validation score: 0.586751
Iteration 5, loss = 0.66397101
Validation score: 0.583596
Iteration 6, loss = 0.66169257
Validation score: 0.580442
Iteration 7, loss = 0.65988805
Validation score: 0.586751
Iteration 8, loss = 0.65924894
Validation score: 0.583596
Iteration 9, loss = 0.65662586
Validation score: 0.583596
Iteration 10, loss = 0.65661826
Validation score: 0.564669
Iteration 11, loss = 0.65586384
Validation score: 0.577287
Iteration 12, loss = 0.65490471
Validation score: 0.583596
Iteration 13, loss = 0.65311612
Validation score: 0.589905
Iteration 14, loss = 0.65200742
Validation score: 0.580442
Iteration 15, loss = 0.65152816
Validation score: 0.580442
Iteration 16, loss = 0.65207635
Validation score: 0.583596
Iteration 17, loss = 0.65032691
Validation score: 0.586751
Iterat

In [114]:
predictions = clf.predict(X_test)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

         0.0       0.62      0.62      0.62       403
         1.0       0.61      0.62      0.61       389

    accuracy                           0.62       792
   macro avg       0.62      0.62      0.62       792
weighted avg       0.62      0.62      0.62       792



In [115]:
confusion_matrix(y_test, predictions)

array([[248, 155],
       [149, 240]])

In [68]:


# save the classifier to a file
with open("./Models/classifier_64perc.pkl", "wb") as file:
    pickle.dump(clf, file)

In [96]:
# load the saved classifier from the file
with open("./Models/classifier_64perc.pkl", "rb") as file:
    clf = pickle.load(file)

predictions = clf.predict(X_test)
print(classification_report(y_test, predictions))
clf

              precision    recall  f1-score   support

         0.0       0.66      0.57      0.61       481
         1.0       0.63      0.72      0.67       500

    accuracy                           0.64       981
   macro avg       0.65      0.64      0.64       981
weighted avg       0.65      0.64      0.64       981



In [78]:
preds = clf.predict(X_train)
print(classification_report(y_train, preds))

              precision    recall  f1-score   support

         0.0       0.61      0.52      0.56      1480
         1.0       0.58      0.67      0.62      1461

    accuracy                           0.60      2941
   macro avg       0.60      0.60      0.59      2941
weighted avg       0.60      0.60      0.59      2941



In [400]:
clf