<a href="https://colab.research.google.com/github/mnocerino23/Winner-s-Circle-Strategy-Analysis/blob/main/Winner'sCircle.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [134]:
import pandas as pd
import numpy as np
from google.colab import drive
drive.mount('/content/drive')
horses = pd.read_csv('/content/drive/MyDrive/Data_Science_Projects/horses.csv')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [135]:
#The horses dataframe contains the 28 horses within the game
#and the length of each of their runs for a certain dice roll (the die has head, helmet, saddle, and horseshoe on it)

#Correct spelling error in csv file
horses.rename(columns = {'Horeshoe':'Horseshoe'}, inplace = True)
print(horses.shape)
horses.head()

(28, 5)


Unnamed: 0,Name,Head,Helmet,Saddle,Horseshoe
0,Stymie,3,12,4,5
1,Twenty Grand,3,1,5,15
2,Gallant Fox,5,8,3,4
3,Devil Diver,2,14,1,9
4,Top Flight,1,3,12,12


In [136]:
#See the datatypes of each feature and the presence of null values within the dataframe using .info()
horses.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28 entries, 0 to 27
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Name       28 non-null     object
 1   Head       28 non-null     int64 
 2   Helmet     28 non-null     int64 
 3   Saddle     28 non-null     int64 
 4   Horseshoe  28 non-null     int64 
dtypes: int64(4), object(1)
memory usage: 1.2+ KB


Feature Engineering - we will build additional stats for each horse based on probability and the goal of the game

In [137]:
#Average Advance - the average length of a move by a horse. We multiply the probability of each dice roll by the length of move for that dice roll for every horse
#In Winner's Circle the die is not even so Head has a 3/6 chance while helmet, saddle, and horseshoe all have a probability of 1/6

#AKA P(horse) = 0.5
#P(helmet) = 0.166
#P(saddle) = 0.166
#P(horseshoe) = 0.166

horses['Average Advance'] = ''
for index, row in horses.iterrows():
  horses.at[index, 'Average Advance'] = (1/2)*horses.at[index, 'Head'] + (1/6)*horses.at[index, 'Helmet'] + (1/6)*horses.at[index, 'Saddle'] + (1/6)*horses.at[index, 'Horseshoe']

horses.head()


Unnamed: 0,Name,Head,Helmet,Saddle,Horseshoe,Average Advance
0,Stymie,3,12,4,5,5.0
1,Twenty Grand,3,1,5,15,5.0
2,Gallant Fox,5,8,3,4,5.0
3,Devil Diver,2,14,1,9,5.0
4,Top Flight,1,3,12,12,5.0


In [138]:
#Having calculated average advance we see that the creator's of the game have made this equal for every horse. However, we can still engineer
#additional features to continue to explore the question regarding if certain horses are better or all perform equally

horses['Prob-Non-Head'] = ''
for index, row in horses.iterrows():
  horses.at[index, 'Prob-Non-Head'] = (1/6)*horses.at[index, 'Helmet'] + (1/6)*horses.at[index, 'Saddle'] + (1/6)*horses.at[index, 'Horseshoe']
horses['Prob-Head'] = ''
for index, row in horses.iterrows():
  horses.at[index, 'Prob-Head'] = (1/2)*horses.at[index, 'Head']

horses.head()

Unnamed: 0,Name,Head,Helmet,Saddle,Horseshoe,Average Advance,Prob-Non-Head,Prob-Head
0,Stymie,3,12,4,5,5.0,3.5,1.5
1,Twenty Grand,3,1,5,15,5.0,3.5,1.5
2,Gallant Fox,5,8,3,4,5.0,2.5,2.5
3,Devil Diver,2,14,1,9,5.0,4.0,1.0
4,Top Flight,1,3,12,12,5.0,4.5,0.5


In [139]:
#Add a feature named contains breakaway. If the horse has a run of 10 or more for any of the dice rolls, we say contains breakaway move is True
horses['Contains Breakaway'] = ''
sides = ['Helmet','Saddle','Horseshoe']

for index, row in horses.iterrows():
  contains = False
  for item in sides:
    if horses.at[index,item] >= 10:
      horses.at[index, 'Contains Breakaway'] = True
      contains = True
      break
  if contains == False:
    horses.at[index, 'Contains Breakaway'] = False

In [140]:
#Good head - create a new feature that is True if head move > 3 (good) and False if head move <= 3 (poor)
horses['Good Head'] = ''
for index, rows in horses.iterrows():
  if horses.at[index,'Head'] <= 3:
    horses.at[index,'Good Head'] = False
  else:
    horses.at[index,'Good Head'] = True

horses.head(15)

Unnamed: 0,Name,Head,Helmet,Saddle,Horseshoe,Average Advance,Prob-Non-Head,Prob-Head,Contains Breakaway,Good Head
0,Stymie,3,12,4,5,5.0,3.5,1.5,True,False
1,Twenty Grand,3,1,5,15,5.0,3.5,1.5,True,False
2,Gallant Fox,5,8,3,4,5.0,2.5,2.5,False,True
3,Devil Diver,2,14,1,9,5.0,4.0,1.0,True,False
4,Top Flight,1,3,12,12,5.0,4.5,0.5,True,False
5,Busher,4,1,16,1,5.0,3.0,2.0,True,True
6,Regret,1,20,4,3,5.0,4.5,0.5,True,False
7,Discovery,4,10,7,1,5.0,3.0,2.0,True,True
8,Sir Barton,3,10,10,1,5.0,3.5,1.5,True,False
9,Gallorette,3,9,5,7,5.0,3.5,1.5,False,False


Below, I write a function to simulate the board game to see if certain horses are more likely to win

In [141]:
import random

def winners_circle():
  #The unbalanced die has 3 heads, 1 helmet, 1 saddle, and 1 horseshoe
  die = ['Head', 'Head', 'Head', 'Helmet', 'Saddle', 'Horseshoe']

  #Choose 7 random horses to start a round (simulates shuffling)
  competitors = [random.choice(horses['Name']) for n in range(7)]
  positions = {}

  #Initialize starting positions of each horse. We will use a dictionary to store the horses location on the board.
  #The first key in the dictionary to obtain a value higher than 36 wins the race
  x = -7
  for i in range(len(competitors)):
    positions[competitors[i]] = x
    x+= 1
  #print(positions)

  #N



  #save the name and starting position of the horse
  #number of moves it took
  #number of breakaways
winners_circle()

{'Stymie': -7, 'Whirlaway': -6, 'Top Flight': -5, 'Gallorette': -4, 'Citation': -3, 'Armed': -2, 'Sir Barton': -1}


In [142]:
#dataframe that holds 