# Rupaul's Drag Race Machine

In [1]:
#%matplotlib inline

import pandas as pd
import numpy as np
import math, random
from scipy.stats import rankdata, kendalltau
from sklearn.preprocessing import scale
import matplotlib.pyplot as plt

theData = pd.read_csv("dragrace.csv")
theData= theData.fillna(0)
#theData.iloc[1:10,:]

In [2]:
queens = theData.groupby('Name').max()
queens = queens.reset_index(drop=False)
#queens.iloc[1:10,:]

In [3]:
# create a function to scale the data for us
def scaleQueens(df):
    """Scale Age, Wins, Highs, Lows, and Lipsyncs in feature data frames"""
    df = df.copy(deep=True)
    df['Age'] = scale(df['Age'])
    df['Wins'] = scale(df['Wins'])
    df['Highs'] = scale(df['Highs'])
    df['Lows'] = scale(df['Lows'])
    df['Lipsyncs'] = scale(df['Lipsyncs'])
    df['Season'] = df['Season']/8
    return df

In [4]:
def compareRanks(x,y):
    x=np.asarray(x)
    y=np.asarray(y)
    numRanks = np.append(x,y).max()
    actual=np.sum(np.square(x-y))
    worst=np.sum(np.square(np.sort(x)-np.sort(x)[::-1]))
    return 1-2*(actual/worst)

In [5]:
def createPD(s,queens,yfitpd):
    season = queens.loc[queens.Season==s,['Name','Place']]
    seasonpred = yfitpd.loc[queens.Season==s]
    season['Predicted'] = seasonpred
    season['Predicted'] = rankdata(season.Predicted,method='min')
    season = season.sort_values('Place')
    return season

In [6]:
def predictSeason(season,model,queens,scaled=False):
    Xtrain = queens.loc[queens.Season!=season,['Age','PuertoRico','PlusSize','Wins','Highs','Lows','Lipsyncs','Season']]
    Xtest = queens.loc[queens.Season==season,['Age','PuertoRico','PlusSize','Wins','Highs','Lows','Lipsyncs','Season']]
    ytrain = queens.loc[queens.Season!=season,'Place']
    if scaled:
        Xtrain = scaleQueens(Xtrain)
        Xtest = scaleQueens(Xtest)
    model.fit(Xtrain,ytrain)
    yfit = model.predict(Xtest)
    pddf = queens.loc[queens.Season==season,['Name','Place']]
    pddf['Predicted'] = rankdata(yfit,method='min')
    return pddf

In [7]:

class neural_network:
    """Defines a neural network, along with attributes for fitting and predicting the network"""
    def __init__(self,size):
        random.seed(0)
        self.network = []
        for i in range(1,len(size)):
            self.network.append([[random.random() for __ in range(size[i-1] + 1)] for __ in range(size[i])])
    
    def sigmoid(self,t): 
        return 1 / (1 + math.exp(-t))
    
    def neuron_output(self,weights, inputs):   #This is a simpler representation; weights for input plus one extra (bias)
        return self.sigmoid(np.dot(weights, inputs))
    
    def feed_forward(self,input_vector):
        outputs = []

        for layer in self.network: #Remember the neural network is given as a list of "layers" which have neurons in them

            input_with_bias = input_vector + [1]          # add a bias input (this just allos us to use a dot product)
            output = [self.neuron_output(neuron, input_with_bias) # compute the output
                      for neuron in layer]                   # for this layer
            outputs.append(output)                           # and remember it

            # the input to the next layer is the output of this one
            input_vector = output

        return outputs
    
    def predict(self,X):
        X = scaleQueens(X)
        predictedY = []
        X = X.values.tolist()
        for i, input in enumerate(X):
            outputs = self.feed_forward(input)[-1]
            predictedY.append(outputs)
        predictedPlace = [a.index(max(a)) for a in predictedY]
        return predictedPlace
    
    
    def convertY(self,Y):
        yNN = [[1 if i == (j-1) else 0 for i in range(0,len(Y)-1)]
               for j in Y ]
        return(yNN)
    
    # define the back-propagation that allows the network to learn
    def backpropagate(self, input_vector, target):
        hidden_outputs, outputs = self.feed_forward(input_vector)

        # the output * (1 - output) is from the derivative of sigmoid
        output_deltas = [output * (1 - output) * (output - target[i])
                         for i, output in enumerate(outputs)]

        # adjust weights for output layer (network[-1])
        for i, output_neuron in enumerate(self.network[-1]):
            for j, hidden_output in enumerate(hidden_outputs + [1]):
                output_neuron[j] -= output_deltas[i] * hidden_output

        # back-propagate errors to hidden layer
        hidden_deltas = [hidden_output * (1 - hidden_output) * 
                          np.dot(output_deltas, [n[i] for n in self.network[-1]]) 
                         for i, hidden_output in enumerate(hidden_outputs)]

        # adjust weights for hidden layer (network[0])
        for i, hidden_neuron in enumerate(self.network[0]):
            for j, input in enumerate(input_vector + [1]):
                hidden_neuron[j] -= hidden_deltas[i] * input
    
    def fit(self,features,targets,times=10000):
        features = scaleQueens(features)
        targets = self.convertY(targets)
        features = features.values.tolist()
        for i in range(times):
            for X, Y in zip(features, targets):
                self.backpropagate(X,Y)
        
        
        

# Predict All Stars

In [11]:
# create series with names of all star queens
allstars = pd.Series(['Tatianna','Adore Delano','Alaska','Alyssa Edwards','Coco Montrese','Detox','Ginger Minj','Katya',"Phi Phi O'Hara",
                     "Roxxxy Andrews"])
allstars

0          Tatianna
1      Adore Delano
2            Alaska
3    Alyssa Edwards
4     Coco Montrese
5             Detox
6       Ginger Minj
7             Katya
8    Phi Phi O'Hara
9    Roxxxy Andrews
dtype: object

In [21]:
names = queens.Name
mask = names.isin(allstars)
Xtest = queens.loc[mask,['Age','PuertoRico','PlusSize','Wins','Highs','Lows','Lipsyncs','Season']]
ytest = queens.loc[mask,'Place']

Xtrain = queens.loc[~mask,['Age','PuertoRico','PlusSize','Wins','Highs','Lows','Lipsyncs','Season']]
ytrain = queens.loc[~mask,'Place']

In [24]:
AllStars = queens.loc[mask,['Name','Season','Place']]
AllStars

Unnamed: 0,Name,Season,Place
1,Adore Delano,6,2
3,Alaska,5,2
6,Alyssa Edwards,5,6
15,Coco Montrese,5,5
22,Detox,5,4
25,Ginger Minj,7,2
39,Katya,7,5
70,Phi Phi O'Hara,4,3
76,Roxxxy Andrews,5,3
87,Tatianna,2,4


In [25]:
XtrainS = scaleQueens(Xtrain)
XtestS = scaleQueens(Xtest)



In [26]:
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor

svc_model = SVC(kernel='rbf',gamma=0.01,C=10)
gnb_model = GaussianNB()
rfc_model = RandomForestClassifier(n_estimators=100, random_state=0)
rfr_model = RandomForestRegressor(200,random_state=24601)
nn_model = neural_network([8,5,14])

# Support Vector Classifier

In [27]:
svc_model.fit(XtrainS,ytrain)
yfit = svc_model.predict(XtestS)
AllStars['SVC'] = rankdata(yfit,method='min')
AllStars

Unnamed: 0,Name,Season,Place,SVC
1,Adore Delano,6,2,3
3,Alaska,5,2,3
6,Alyssa Edwards,5,6,8
15,Coco Montrese,5,5,8
22,Detox,5,4,8
25,Ginger Minj,7,2,1
39,Katya,7,5,6
70,Phi Phi O'Hara,4,3,5
76,Roxxxy Andrews,5,3,2
87,Tatianna,2,4,7


# Gaussian Naive Bayes

In [28]:
gnb_model.fit(Xtrain,ytrain)
yfit = gnb_model.predict(Xtest)
AllStars['GNB'] = rankdata(yfit,method='min')
AllStars

Unnamed: 0,Name,Season,Place,SVC,GNB
1,Adore Delano,6,2,3,1
3,Alaska,5,2,3,1
6,Alyssa Edwards,5,6,8,8
15,Coco Montrese,5,5,8,8
22,Detox,5,4,8,8
25,Ginger Minj,7,2,1,7
39,Katya,7,5,6,1
70,Phi Phi O'Hara,4,3,5,1
76,Roxxxy Andrews,5,3,2,1
87,Tatianna,2,4,7,6


# Random Forest Classifier

In [29]:
rfc_model.fit(Xtrain,ytrain)
yfit = rfc_model.predict(Xtest)
AllStars['RFC'] = rankdata(yfit,method='min')
AllStars

Unnamed: 0,Name,Season,Place,SVC,GNB,RFC
1,Adore Delano,6,2,3,1,1
3,Alaska,5,2,3,1,3
6,Alyssa Edwards,5,6,8,8,8
15,Coco Montrese,5,5,8,8,8
22,Detox,5,4,8,8,5
25,Ginger Minj,7,2,1,7,1
39,Katya,7,5,6,1,5
70,Phi Phi O'Hara,4,3,5,1,8
76,Roxxxy Andrews,5,3,2,1,7
87,Tatianna,2,4,7,6,4


# Random Forest Regressor

In [30]:
rfr_model.fit(Xtrain,ytrain)
yfit = rfr_model.predict(Xtest)
AllStars['RFR'] = rankdata(yfit,method='min')
AllStars

Unnamed: 0,Name,Season,Place,SVC,GNB,RFC,RFR
1,Adore Delano,6,2,3,1,1,2
3,Alaska,5,2,3,1,3,4
6,Alyssa Edwards,5,6,8,8,8,9
15,Coco Montrese,5,5,8,8,8,10
22,Detox,5,4,8,8,5,8
25,Ginger Minj,7,2,1,7,1,1
39,Katya,7,5,6,1,5,6
70,Phi Phi O'Hara,4,3,5,1,8,5
76,Roxxxy Andrews,5,3,2,1,7,3
87,Tatianna,2,4,7,6,4,7


# Neural Network

In [31]:
nn_model.fit(XtrainS,ytrain)
yfit = nn_model.predict(XtestS)
AllStars['NN'] = rankdata(yfit,method='min')
AllStars

Unnamed: 0,Name,Season,Place,SVC,GNB,RFC,RFR,NN
1,Adore Delano,6,2,3,1,1,2,3
3,Alaska,5,2,3,1,3,4,7
6,Alyssa Edwards,5,6,8,8,8,9,5
15,Coco Montrese,5,5,8,8,8,10,9
22,Detox,5,4,8,8,5,8,9
25,Ginger Minj,7,2,1,7,1,1,1
39,Katya,7,5,6,1,5,6,6
70,Phi Phi O'Hara,4,3,5,1,8,5,3
76,Roxxxy Andrews,5,3,2,1,7,3,2
87,Tatianna,2,4,7,6,4,7,7


In [32]:
AllStars.to_csv('AllStars.csv')