# Rupaul's Drag Race Machine

In [30]:
#%matplotlib inline

import pandas as pd
import numpy as np
import math
from scipy.stats import rankdata, kendalltau
from sklearn.preprocessing import scale
import matplotlib.pyplot as plt

theData = pd.read_csv("dragrace.csv")
theData= theData.fillna(0)
#theData.iloc[1:10,:]

In [31]:
queens = theData.groupby('Name').max()
queens = queens.reset_index(drop=False)
#queens.iloc[1:10,:]

In [32]:
# create a function to scale the data for us
def scaleQueens(df):
    """Scale Age, Wins, Highs, Lows, and Lipsyncs in feature data frames"""
    df = df.copy(deep=True)
    df['Age'] = scale(df['Age'])
    df['Wins'] = scale(df['Wins'])
    df['Highs'] = scale(df['Highs'])
    df['Lows'] = scale(df['Lows'])
    df['Lipsyncs'] = scale(df['Lipsyncs'])
    df['Season'] = df['Season']/8
    return df

In [33]:
def compareRanks(x,y):
    x=np.asarray(x)
    y=np.asarray(y)
    numRanks = np.append(x,y).max()
    actual=np.sum(np.square(x-y))
    worst=np.sum(np.square(np.sort(x)-np.sort(x)[::-1]))
    return 1-2*(actual/worst)

In [34]:
Xtrain = queens.loc[queens.Season<7,['Age','PuertoRico','PlusSize','Wins','Highs','Lows','Lipsyncs','Season']]
Xtrain.reset_index(drop=True)
Xtest = queens.loc[queens.Season==7,['Age','PuertoRico','PlusSize','Wins','Highs','Lows','Lipsyncs','Season']]
Xtest.reset_index(drop=True)
ytrain = queens.loc[queens.Season<7,'Place']
ytrain.reset_index(drop=True)
ytest = queens.loc[queens.Season==7,'Place']
ytest.reset_index(drop=True)
season8X = queens.loc[queens.Season==8,['Age','PuertoRico','PlusSize','Wins','Highs','Lows','Lipsyncs','Season']]
season8y = queens.loc[queens.Season==8,'Place']
season7 = queens.loc[queens.Season==7,['Name','Place']]
season7.columns = ['Name','Actual']
season7 = season7.reset_index(drop=True)
season8 = queens.loc[queens.Season==8,['Name','Place']]
season8.columns = ['Name','Actual']
season8 = season8.reset_index(drop=True)

In [35]:
XtrainS = scaleQueens(Xtrain)
XtestS = scaleQueens(Xtest)
season8XS = scaleQueens(season8X)
XtestS



Unnamed: 0,Age,PuertoRico,PlusSize,Wins,Highs,Lows,Lipsyncs,Season
25,-0.147878,0,1,1.658597,1.048809,-0.650945,0.68313,0.875
31,-0.699958,0,1,-0.921443,1.048809,-0.650945,2.04939,0.875
32,0.95628,0,0,-0.921443,-0.953463,0.260378,-0.68313,0.875
38,-0.285898,1,0,-0.921443,-0.953463,0.260378,0.68313,0.875
39,0.266181,0,0,0.798584,1.716233,-0.650945,0.68313,0.875
41,0.404201,0,0,0.798584,1.716233,-0.650945,0.68313,0.875
53,-1.114017,0,0,0.798584,-0.953463,-0.650945,-0.68313,0.875
57,-0.147878,0,0,-0.921443,0.381385,2.994345,-0.68313,0.875
60,1.784399,0,0,-0.921443,0.381385,-0.650945,-0.68313,0.875
68,-0.975997,0,0,0.798584,-0.953463,0.260378,0.68313,0.875


# Support Vector Machine Classifier

In [36]:
from sklearn.svm import SVC
model = SVC(kernel='rbf',gamma=0.01,C=10)
model.fit(XtrainS,ytrain)

yfit = model.predict(XtestS)
yfitpd = pd.DataFrame(yfit)
yfitpd['Name'] = season7.Name
yfitpd.columns = ['Predicted','Name']
yfitpd = yfitpd.loc[:,['Name','Predicted']]
yfitpd['Predicted'] = rankdata(yfitpd.Predicted,method='min')
svc7 = pd.merge(season7,yfitpd).sort_values('Actual')
svc7

Unnamed: 0,Name,Actual,Predicted
13,Voilet Chachki,1,1
0,Ginger Minj,2,2
9,Pearl,3,7
5,Kennedy Davenport,4,2
4,Katya,5,2
12,Trixie Mattel,6,7
7,Miss Fame,7,5
1,Jaidynn Diore Fierce,8,6
6,Max,9,7
3,Kandy Ho,10,7


In [37]:
kendalltau(svc7.Actual,svc7.Predicted)

KendalltauResult(correlation=0.769577712390211, pvalue=0.0001261425150919689)

In [38]:
compareRanks(svc7.Actual,svc7.Predicted)

0.85714285714285721

## Season 8 Predictions

In [39]:
presentFit = model.predict(season8XS)
presentFitPD = pd.DataFrame(presentFit)
presentFitPD['Name'] = season8['Name']
presentFitPD.columns = ['Predicted','Name']
presentFitPD['Predicted'] = rankdata(presentFitPD.Predicted,method='min')
svc8 = pd.merge(season8,presentFitPD).sort_values('Predicted')
svc8

Unnamed: 0,Name,Actual,Predicted
1,Bob the Drag Queen,0,1
6,Kim Chi,0,1
5,Derrick Barry,5,3
10,Robbie Turner,7,3
0,Acid Betty,8,5
2,Chi Chi DeVayne,4,5
8,Naomi Smalls,0,7
7,Laila McQueen,11,8
9,Naysha Lopez,9,8
3,Cynthia Lee Fontaine,10,10


In [40]:
svc8tau = svc8.loc[svc8.Actual!=0,:]
kendalltau(svc8tau.Actual,svc8tau.Predicted)

KendalltauResult(correlation=0.46291004988627577, pvalue=0.082312803508471227)

In [41]:
compareRanks(svc8tau.Actual,svc8tau.Predicted)

0.45714285714285718

## Details of the model

In [42]:
XtestS

Unnamed: 0,Age,PuertoRico,PlusSize,Wins,Highs,Lows,Lipsyncs,Season
25,-0.147878,0,1,1.658597,1.048809,-0.650945,0.68313,0.875
31,-0.699958,0,1,-0.921443,1.048809,-0.650945,2.04939,0.875
32,0.95628,0,0,-0.921443,-0.953463,0.260378,-0.68313,0.875
38,-0.285898,1,0,-0.921443,-0.953463,0.260378,0.68313,0.875
39,0.266181,0,0,0.798584,1.716233,-0.650945,0.68313,0.875
41,0.404201,0,0,0.798584,1.716233,-0.650945,0.68313,0.875
53,-1.114017,0,0,0.798584,-0.953463,-0.650945,-0.68313,0.875
57,-0.147878,0,0,-0.921443,0.381385,2.994345,-0.68313,0.875
60,1.784399,0,0,-0.921443,0.381385,-0.650945,-0.68313,0.875
68,-0.975997,0,0,0.798584,-0.953463,0.260378,0.68313,0.875


def jitter(arr):
    stdev = .01*(max(arr)-min(arr))
    return arr + np.random.randn(len(arr)) * stdev
set1cm = plt.get_cmap("Set1")
set1cm


plt.xkcd()

Xplot = Xtrain[['Wins','Lipsyncs']]
yplot = ytrain

fig, ax = plt.subplots()

for i in range(1,14):
    thisX = Xtrain.loc[ytrain==i,:]
    ax.scatter(jitter(Xplot['Wins']),jitter(Xplot['Lipsyncs']),c=i,label="{0} Place".format(i))


ax.legend()

# Gaussian Naive Bayes

In [43]:
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(Xtrain,ytrain)
nbTest = model.predict(Xtest)
nbTestDB = pd.DataFrame(nbTest)
nbTestDB['Name'] = season7.Name
nbTestDB.columns = ['Predicted','Name']
nbTestDB = nbTestDB.loc[:,['Name','Predicted']]
nbTestDB['Predicted'] = rankdata(nbTestDB.Predicted,method='min')

nb7 = pd.merge(season7,nbTestDB).sort_values('Actual')
nb7

Unnamed: 0,Name,Actual,Predicted
13,Voilet Chachki,1,1
0,Ginger Minj,2,6
9,Pearl,3,8
5,Kennedy Davenport,4,1
4,Katya,5,1
12,Trixie Mattel,6,8
7,Miss Fame,7,4
1,Jaidynn Diore Fierce,8,6
6,Max,9,8
3,Kandy Ho,10,4


In [44]:
kendalltau(nb7.Actual,nb7.Predicted)

KendalltauResult(correlation=0.48108780653777095, pvalue=0.016544547824374332)

In [45]:
compareRanks(nb7.Actual,nb7.Predicted)

0.54725274725274731

## Season 8 Predictions

In [46]:
nb8 = model.predict(season8X)
nb8DB = pd.DataFrame(nb8)
nb8DB['Name'] = season8.Name
nb8DB.columns = ['Predicted','Name']
nb8DB['Predicted'] = rankdata(nb8DB.Predicted,method='min')
nb8 = pd.merge(season8,nb8DB).sort_values('Predicted')
nb8

Unnamed: 0,Name,Actual,Predicted
0,Acid Betty,8,1
1,Bob the Drag Queen,0,1
6,Kim Chi,0,1
8,Naomi Smalls,0,1
11,Thorgy Thor,6,1
5,Derrick Barry,5,6
10,Robbie Turner,7,6
3,Cynthia Lee Fontaine,10,8
9,Naysha Lopez,9,8
2,Chi Chi DeVayne,4,10


In [47]:
nb8tau = nb8.loc[nb8.Actual!=0,:]
kendalltau(nb8tau.Actual,nb8tau.Predicted)

KendalltauResult(correlation=0.41833001326703778, pvalue=0.1163916078469175)

In [48]:
compareRanks(nb8tau.Actual,nb8tau.Predicted)

-0.11428571428571432

# Random Forest Classifier

In [49]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=100, random_state=0)
model.fit(Xtrain,ytrain)
yfit = model.predict(Xtest)

yfitpd = pd.DataFrame(yfit)
yfitpd['Name'] = season7.Name
yfitpd.columns = ['Predicted','Name']
yfitpd = yfitpd.loc[:,['Name','Predicted']]
yfitpd['Predicted'] = rankdata(yfitpd.Predicted,method='min')

## Season 7 Predictions

In [50]:
rfClass = pd.merge(season7,yfitpd).sort_values('Actual')
rfClass

Unnamed: 0,Name,Actual,Predicted
13,Voilet Chachki,1,1
0,Ginger Minj,2,6
9,Pearl,3,3
5,Kennedy Davenport,4,4
4,Katya,5,4
12,Trixie Mattel,6,6
7,Miss Fame,7,10
1,Jaidynn Diore Fierce,8,6
6,Max,9,1
3,Kandy Ho,10,6


In [51]:
kendalltau(rfClass.Actual,rfClass.Predicted)

KendalltauResult(correlation=0.64061851133006142, pvalue=0.0014157354382150771)

In [52]:
compareRanks(rfClass.Actual,rfClass.Predicted)

0.74945054945054945

## Season 8 Predictions

In [53]:
rf8Fit = model.predict(season8X)
rf8FitPD = pd.DataFrame(rf8Fit)
rf8FitPD['Name'] = season8.Name
rf8FitPD.columns = ['Predicted','Name']
rf8FitPD = rf8FitPD.loc[:,['Name','Predicted']]
rf8FitPD['Predicted'] = rankdata(rf8FitPD.Predicted,method='min')

In [54]:
rfClass8 = pd.merge(season8,rf8FitPD).sort_values('Predicted')
rfClass8

Unnamed: 0,Name,Actual,Predicted
0,Acid Betty,8,1
1,Bob the Drag Queen,0,1
6,Kim Chi,0,3
2,Chi Chi DeVayne,4,4
5,Derrick Barry,5,5
10,Robbie Turner,7,5
8,Naomi Smalls,0,7
11,Thorgy Thor,6,7
9,Naysha Lopez,9,9
4,Dax ExclamationPoint,11,10


In [55]:
rfClass8tau = rfClass8.loc[rfClass8.Actual!=0,:]
kendalltau(rfClass8tau.Actual,rfClass8tau.Predicted)

KendalltauResult(correlation=0.57977103565244836, pvalue=0.029552937974206061)

In [56]:
compareRanks(rfClass8tau.Actual,rfClass8tau.Predicted)

0.4285714285714286

# Random Forest Regressor

In [57]:
from sklearn.ensemble import RandomForestRegressor
forest = RandomForestRegressor(200,random_state=24601)
forest.fit(Xtrain,ytrain)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=200, n_jobs=1, oob_score=False, random_state=24601,
           verbose=0, warm_start=False)

## Season 7 Predictions

In [58]:
forestFit = forest.predict(Xtest)
forestFitDB = pd.DataFrame(forestFit.round())
forestFitDB['Name'] = season7.Name
forestFitDB.columns = ['Predicted','Name']
forestFitDB['Predicted'] = rankdata(forestFitDB.Predicted,method='min')
rfReg7 = pd.merge(season7,forestFitDB).sort_values('Actual')
rfReg7

Unnamed: 0,Name,Actual,Predicted
13,Voilet Chachki,1,2
0,Ginger Minj,2,1
9,Pearl,3,6
5,Kennedy Davenport,4,2
4,Katya,5,2
12,Trixie Mattel,6,8
7,Miss Fame,7,8
1,Jaidynn Diore Fierce,8,6
6,Max,9,5
3,Kandy Ho,10,8


In [59]:
kendalltau(rfReg7.Actual,rfReg7.Predicted)

KendalltauResult(correlation=0.73379938570534309, pvalue=0.00025655048033250812)

In [60]:
compareRanks(rfReg7.Actual,rfReg7.Predicted)

0.87252747252747254

## Season 8 Predictions

In [61]:
forest8 = forest.predict(season8X)
forest8DB = pd.DataFrame(forest8.round())
forest8DB['Name'] = season8.Name
forest8DB.columns = ['Predicted','Name']
forest8DB['Predicted'] = rankdata(forest8DB.Predicted,method='min')
forest8 = pd.merge(season8,forest8DB).sort_values('Predicted')
forest8

Unnamed: 0,Name,Actual,Predicted
5,Derrick Barry,5,1
6,Kim Chi,0,1
1,Bob the Drag Queen,0,3
10,Robbie Turner,7,3
2,Chi Chi DeVayne,4,5
8,Naomi Smalls,0,5
11,Thorgy Thor,6,7
0,Acid Betty,8,8
7,Laila McQueen,11,9
4,Dax ExclamationPoint,11,10


In [62]:
forest8tau = forest8.loc[forest8.Actual!=0,:]
kendalltau(forest8tau.Actual,forest8tau.Predicted)

KendalltauResult(correlation=0.62857142857142867, pvalue=0.018314797230417328)

In [63]:
compareRanks(forest8tau.Actual,forest8tau.Predicted)

0.58095238095238089

# Neural Network

Unfortunately, scikit learn's stable release does not include neural networks (it is being added in the next version) so if I want to include a neural network I have to code one from scratch (credit goes to Joel Grus's Data Science From Scratch book, available from O'Reilly, for much of the code for the neural network)

In [64]:
import numpy as np
import math, random
# create the "step" function
def sigmoid(t): 
    return 1 / (1 + math.exp(-t))

# create the neuron
def neuron_output(weights, inputs):   #This is a simpler representation; weights for input plus one extra (bias)
    return sigmoid(np.dot(weights, inputs))

# define the network
def feed_forward(neural_network, input_vector):
    """takes in a neural network (represented as a list of lists of lists of weights)
    and returns the output from forward-propagating the input"""

    outputs = []

    for layer in neural_network: #Remember the neural network is given as a list of "layers" which have neurons in them

        input_with_bias = input_vector + [1]          # add a bias input (this just allos us to use a dot product)
        output = [neuron_output(neuron, input_with_bias) # compute the output
                  for neuron in layer]                   # for this layer
        outputs.append(output)                           # and remember it

        # the input to the next layer is the output of this one
        input_vector = output

    return outputs

# define the back-propagation that allows the network to learn
def backpropagate(network, input_vector, target):

    hidden_outputs, outputs = feed_forward(network, input_vector)
    
    # the output * (1 - output) is from the derivative of sigmoid
    output_deltas = [output * (1 - output) * (output - target[i])
                     for i, output in enumerate(outputs)]
                     
    # adjust weights for output layer (network[-1])
    for i, output_neuron in enumerate(network[-1]):
        for j, hidden_output in enumerate(hidden_outputs + [1]):
            output_neuron[j] -= output_deltas[i] * hidden_output

    # back-propagate errors to hidden layer
    hidden_deltas = [hidden_output * (1 - hidden_output) * 
                      np.dot(output_deltas, [n[i] for n in network[-1]]) 
                     for i, hidden_output in enumerate(hidden_outputs)]

    # adjust weights for hidden layer (network[0])
    for i, hidden_neuron in enumerate(network[0]):
        for j, input in enumerate(input_vector + [1]):
            hidden_neuron[j] -= hidden_deltas[i] * input

In [65]:
random.seed(0)   # to get repeatable results
input_size = 8  # each input is a vector of length 8
num_hidden = 5   # we'll have 5 neurons in the hidden layer
output_size = 14 # we need 14 outputs for each input, since there are 14 possible places

In [66]:
# each hidden neuron has one weight per input, plus a bias weight
hidden_layer = [[random.random() for __ in range(input_size + 1)]
                    for __ in range(num_hidden)]

In [67]:
# each output neuron has one weight per hidden neuron, plus a bias weight
output_layer = [[random.random() for __ in range(num_hidden + 1)]
                    for __ in range(output_size)]

In [68]:
# the network starts out with random weights
network = [hidden_layer, output_layer]

Neural networks work best if values are standardized close to 0-1. So for the non-dummy variables, I'll standardize to mean=0, sd=1

In [69]:
# create a function to scale the data for us
def scaleQueens(df):
    """Scale Age, Wins, Highs, Lows, and Lipsyncs in feature data frames"""
    df = df.copy(deep=True)
    df['Age'] = scale(df['Age'])
    df['Wins'] = scale(df['Wins'])
    df['Highs'] = scale(df['Highs'])
    df['Lows'] = scale(df['Lows'])
    df['Lipsyncs'] = scale(df['Lipsyncs'])
    return df

In [70]:
from sklearn.preprocessing import scale
queensNN = queens
#scale season for entire data set (I'll scale each subset of data individually for everything else)
queensNN['Season'] = scale(queensNN['Season'])

#dataset = zip(inputs_dig, targets_dig)



In [71]:
XtrainNN = queensNN.loc[queens.Season<7,['Age','PuertoRico','PlusSize','Wins','Highs','Lows','Lipsyncs','Season']]
XtrainNN = scaleQueens(XtrainNN)
XtestNN = queensNN.loc[(queensNN.Season>1) & (queensNN.Season<1.5),['Age','PuertoRico','PlusSize','Wins','Highs','Lows','Lipsyncs','Season']]
XtestNN = scaleQueens(XtestNN)
ytrainNN = queensNN.loc[queens.Season<7,'Place']
ytestNN = queensNN.loc[queens.Season==7,'Place']
season8XNN = queensNN.loc[queensNN.Season>1.5,['Age','PuertoRico','PlusSize','Wins','Highs','Lows','Lipsyncs','Season']]
season8XNN = scaleQueens(season8XNN)
season8yNN = queensNN.loc[queens.Season==8,'Place']



In [72]:
yNN = [[1 if i == j else 0 for i in range(14)]
               for j in ytrainNN ]
XNN = XtrainNN.values.tolist()

In [73]:

for i in range(10000):
    for input_vector, target_vector in zip(XNN, yNN):
        backpropagate(network, input_vector, target_vector)

In [74]:
def predict(input):
        return feed_forward(network, input)[-1]

## Season 7 Predictions

In [75]:
X7NN = XtestNN.values.tolist()

In [76]:
predictedY = []
for i, input in enumerate(X7NN):
    outputs = predict(input)
    predictedY.append(outputs)
#for q in predictedY:
#    print([round(a,2) for a in q])

In [77]:
predictedPlace = [a.index(max(a)) for a in predictedY]
predictedPlace = rankdata(predictedPlace,method='min')
season7NN = season7
season7NN['Predicted'] = predictedPlace
season7NN.sort_values('Actual')

Unnamed: 0,Name,Actual,Predicted
13,Voilet Chachki,1,1
0,Ginger Minj,2,2
9,Pearl,3,3
5,Kennedy Davenport,4,4
4,Katya,5,4
12,Trixie Mattel,6,6
7,Miss Fame,7,9
1,Jaidynn Diore Fierce,8,6
6,Max,9,8
3,Kandy Ho,10,9


In [78]:
kendalltau(season7NN.Actual,season7NN.Predicted)

KendalltauResult(correlation=0.8530244589991115, pvalue=2.1416755669110968e-05)

In [79]:
compareRanks(season7NN.Actual,season7NN.Predicted)

0.85714285714285721

## Season 8 Predictions

In [80]:
season8XNN['Wins'] = scale(season8XNN['Wins'])
season8XNN['Highs'] = scale(season8XNN['Highs'])
season8XNN['Lows'] = scale(season8XNN['Lows'])
season8XNN['Lipsyncs'] = scale(season8XNN['Lipsyncs'])
X8NN = season8XNN.values.tolist()
predictedY = []
for i, input in enumerate(X8NN):
    outputs = predict(input)
    predictedY.append(outputs)
for q in predictedY:
    print([round(a,2) for a in q])

[1.0, 0.0, 0.0, 0.0, 0.0, 0.03, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]
[0.02, 1.0, 0.0, 0.01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
[0.01, 0.0, 0.0, 0.06, 0.39, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.02, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.09, 0.05, 0.23, 0.05, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.07, 0.16, 0.13, 0.12, 0.23, 0.37]
[0.0, 0.0, 0.0, 0.02, 0.88, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
[0.13, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.92, 0.0, 0.0, 0.02, 0.0, 0.0]
[1.0, 0.0, 0.0, 0.0, 0.04, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.08, 0.05, 0.23, 0.05, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01, 0.0, 0.03, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.08, 0.05, 0.23, 0.05, 0.0]


In [81]:
[a.index(max(a)) for a in predictedY]

[9, 1, 4, 11, 13, 4, 1, 8, 0, 11, 11, 11]

In [82]:
predictedPlace = [a.index(max(a)) for a in predictedY]
predictedPlace = rankdata(predictedPlace,method='min')
season8NN = season8
season8NN['Predicted'] = predictedPlace
season8NN = season8NN.sort_values('Predicted')
season8NN

Unnamed: 0,Name,Actual,Predicted
8,Naomi Smalls,0,1
1,Bob the Drag Queen,0,2
6,Kim Chi,0,2
2,Chi Chi DeVayne,4,4
5,Derrick Barry,5,4
7,Laila McQueen,11,6
0,Acid Betty,8,7
3,Cynthia Lee Fontaine,10,8
9,Naysha Lopez,9,8
10,Robbie Turner,7,8


In [83]:
season8tau = season8NN.loc[season8NN.Actual!=0,:]
kendalltau(season8tau.Actual,season8tau.Predicted)

KendalltauResult(correlation=0.43943537440204117, pvalue=0.099083132728722628)

In [84]:
compareRanks(season8tau.Actual,season8tau.Predicted)

0.63809523809523805