# Elo Score Calculation

In [1]:
import re
from collections import defaultdict

In [2]:
import numpy as np
import pandas as pd

## Summary

- Notebook that calcuates the Elo Score of mice competing over access to a tone associated reward port
- The data is a spreadsheet of combination of subjects (as rows) against dates (as columns)

## Importing Data

- The original data has been trimmed so that only the cells with dates, winner ID's, and relevant metadata were kept

In [37]:
tube_test_df = pd.read_csv("../../data/tube_test_fights.csv")

In [38]:
tube_test_df.head()

Unnamed: 0,cage,animal,25-Apr,26-Apr,27-Apr,28-Apr,29-Apr,2-May,4-May,5-May,6-May,9-May,10-May,11-May,12-May,13-May,20-May
0,1,1.1 v 2.2,1.1,1.1,1.1,2.2,1.1,1.1,,1.1,1.1,1.1,1.1,1.1,1.1,1.1,1.1
1,1,2.2 v 2.3,2.2,2.3,2.2,2.2,2.2,2.2,,2.2,2.2,2.2,2.2,2.2,2.2,2.2,2.2
2,1,2.3 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4
3,1,1.4 v 1.1,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4
4,1,2.2 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4


## Doing it for a subset of the data

- To test out the code, we will be using the data from only one cage

In [39]:
# Getting all the rows that are from cage 1
cage_1_df = tube_test_df[tube_test_df["cage"] == 1]

In [40]:
cage_1_df

Unnamed: 0,cage,animal,25-Apr,26-Apr,27-Apr,28-Apr,29-Apr,2-May,4-May,5-May,6-May,9-May,10-May,11-May,12-May,13-May,20-May
0,1,1.1 v 2.2,1.1,1.1,1.1,2.2,1.1,1.1,,1.1,1.1,1.1,1.1,1.1,1.1,1.1,1.1
1,1,2.2 v 2.3,2.2,2.3,2.2,2.2,2.2,2.2,,2.2,2.2,2.2,2.2,2.2,2.2,2.2,2.2
2,1,2.3 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4
3,1,1.4 v 1.1,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4
4,1,2.2 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4
5,1,1.1 v 2.3,2.3,1.1,1.1,1.1,1.1,1.1,,1.1,1.1,1.1,1.1,1.1,1.1,1.1,1.1


## Getting a list of all the animals

- Because each cell does not contain the ID of all the animals that were competing, we will make a new column with both ID's

In [54]:
def get_all_animal_ids(animal_string):
    """
    Converts a string that contains the ID of animals, and only gets the IDs. 
    This usually removes extra characters that were added. (i.e. "1.1 v 2.2" to ("1.1", "2.2"))

    Args:
        animal_string(str): This is the first param.

    Returns:
        tuple: Of IDs of animals as strings
    """
    # Splitting by space so that we have a list of just the words
    all_words = animal_string.split()
    # Removing all words that are not numbers
    all_numbers = [num for num in all_words if re.match(r'^-?\d+(?:\.\d+)$', num)]
    return tuple(all_numbers)


In [48]:
print(get_all_animal_ids("1.1 v 2.3"))

('1.1', '2.3')


- Turning all the columns into string so that we can match the ID's from one column to another

In [45]:
for col in cage_1_df.columns:
    cage_1_df[col] = cage_1_df[col].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cage_1_df[col] = cage_1_df[col].astype(str)


In [50]:
cage_1_df.head()

Unnamed: 0,cage,animal,25-Apr,26-Apr,27-Apr,28-Apr,29-Apr,2-May,4-May,5-May,6-May,9-May,10-May,11-May,12-May,13-May,20-May,all_animals
0,1,1.1 v 2.2,1.1,1.1,1.1,2.2,1.1,1.1,,1.1,1.1,1.1,1.1,1.1,1.1,1.1,1.1,"(1.1, 2.2)"
1,1,2.2 v 2.3,2.2,2.3,2.2,2.2,2.2,2.2,,2.2,2.2,2.2,2.2,2.2,2.2,2.2,2.2,"(2.2, 2.3)"
2,1,2.3 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,"(2.3, 1.4)"
3,1,1.4 v 1.1,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,"(1.4, 1.1)"
4,1,2.2 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,"(2.2, 1.4)"


In [49]:
cage_1_df["all_animals"] = cage_1_df["animal"].apply(lambda x: get_all_animal_ids(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cage_1_df["all_animals"] = cage_1_df["animal"].apply(lambda x: get_all_animal_ids(x))


In [51]:
cage_1_df.head()

Unnamed: 0,cage,animal,25-Apr,26-Apr,27-Apr,28-Apr,29-Apr,2-May,4-May,5-May,6-May,9-May,10-May,11-May,12-May,13-May,20-May,all_animals
0,1,1.1 v 2.2,1.1,1.1,1.1,2.2,1.1,1.1,,1.1,1.1,1.1,1.1,1.1,1.1,1.1,1.1,"(1.1, 2.2)"
1,1,2.2 v 2.3,2.2,2.3,2.2,2.2,2.2,2.2,,2.2,2.2,2.2,2.2,2.2,2.2,2.2,2.2,"(2.2, 2.3)"
2,1,2.3 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,"(2.3, 1.4)"
3,1,1.4 v 1.1,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,"(1.4, 1.1)"
4,1,2.2 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,"(2.2, 1.4)"


## Elo Score Calculation

In [55]:
def calculate_elo_score(subject_elo_score, agent_elo_score, k_factor=20, score=1, number_of_decimals=None):
    """
    Calculates the Elo score of a given subject given it's original score, it's opponent, 
    the K-Factor, and whether or not it has won or not. 
    The calculation is based on: https://www.omnicalculator.com/sports/elo

    Args:
        subject_elo_score(float): The original Elo score for the subject
        agent_elo_score(float): The original Elo score for the agent
        k_factor(int): k-factor, or development coefficient. 
            - It usually takes values between 10 and 40, depending on player's strength 
        score(int): the actual outcome of the game. 
            - In chess, a win counts as 1 point, a draw is equal to 0.5, and a lose gives 0.
        number_of_decimals(int): Number of decimals to round to
        
    Returns:
        int: Updated Elo score of the subject
    """
    # Calculating the Elo score
    rating_difference = subject_elo_score - agent_elo_score
    expected_score = 1 / (1 + 10 ** (rating_difference / 400))
    new_elo_score = subject_rating + k_factor * (score - expected_score)
    # Rounding to `number_of_decimals`
    return round(new_elo_score, number_of_decimals)

In [53]:
calculate_elo_score(subject_rating=1500, agent_rating=500, score=0)

1500

## Calculate all the Elo scores for this cage

In [16]:
cage_1_df.columns

Index(['cage', 'animal', '25-Apr', '26-Apr', '27-Apr', '28-Apr', '29-Apr',
       '2-May', '4-May', '5-May', '6-May', '9-May', '10-May', '11-May',
       '12-May', '13-May', '20-May', 'all_animals'],
      dtype='object')

In [17]:
id_to_elo_score = defaultdict(lambda:1000)
id_to_day_to_elo_score = defaultdict(dict)
for column in cage_1_df:
    if column not in ['cage', 'animal', 'all_animals']:
        print("Current Date: {}".format(column))
        for index, row in cage_1_df.iterrows():
            if row[column] == "nan":
                continue
            else:            
                winner_id = row[column]
            loser_id = list(set(row["all_animals"]) - set([winner_id]))
            loser_id = loser_id[0]
            current_winner_rating = id_to_elo_score[winner_id] 
            current_loser_rating = id_to_elo_score[loser_id] 
            
            id_to_elo_score[winner_id] = calculate_elo_score(subject_rating=current_winner_rating, agent_rating=current_loser_rating, score=1)
            id_to_elo_score[loser_id] = calculate_elo_score(subject_rating=current_loser_rating, agent_rating=current_winner_rating, score=0)
            id_to_day_to_elo_score[winner_id][column] = id_to_elo_score[winner_id]
            id_to_day_to_elo_score[loser_id][column] = id_to_elo_score[loser_id]
            
    

Current Date: 25-Apr
Current Date: 26-Apr
Current Date: 27-Apr
Current Date: 28-Apr
Current Date: 29-Apr
Current Date: 2-May
Current Date: 4-May
Current Date: 5-May
Current Date: 6-May
Current Date: 9-May
Current Date: 10-May
Current Date: 11-May
Current Date: 12-May
Current Date: 13-May
Current Date: 20-May


In [18]:
id_to_elo_score

defaultdict(<function __main__.<lambda>()>,
            {'1.1': 1154.3595348295344,
             '2.2': 745.0332818587308,
             '2.3': 408.6268065515695,
             '1.4': 1691.9803767601657})

In [19]:
id_to_day_to_elo_score

defaultdict(dict,
            {'1.1': {'25-Apr': 990.5664993847546,
              '26-Apr': 999.4458171400197,
              '27-Apr': 1009.957621071186,
              '28-Apr': 1000.6205176739941,
              '29-Apr': 1011.3798817179143,
              '2-May': 1023.3639405434998,
              '5-May': 1036.574950338623,
              '6-May': 1050.951841150997,
              '9-May': 1066.382069063183,
              '10-May': 1082.7230643562225,
              '11-May': 1099.8243281762548,
              '12-May': 1117.5438244389834,
              '13-May': 1135.7569526082937,
              '20-May': 1154.3595348295344},
             '2.2': {'25-Apr': 989.1207441750823,
              '26-Apr': 956.4048576685256,
              '27-Apr': 940.4878516425163,
              '28-Apr': 943.9899314558725,
              '29-Apr': 927.1777239400035,
              '2-May': 909.1442970462924,
              '5-May': 890.1044834295956,
              '6-May': 870.2901800187615,
              '9-May

# Calculate Elo score for all cells

In [20]:
for col in tube_test_df.columns:
    tube_test_df[col] = tube_test_df[col].astype(str)

In [21]:
rename_dates_dict = {"25-Apr": "04_25",  "26-Apr": "04_26",  "27-Apr": "04_27",  "28-Apr": "04_28",  "29-Apr": "04_29",  "2-May": "05_02", "4-May": "05_04", "5-May": "05_05", "6-May": "05_06", "9-May": "05_09", "10-May": "05_10", "11-May": "05_11", "12-May": "05_12", "13-May": "05_13",  "20-May": "05_20"}

In [22]:
tube_test_df = tube_test_df.rename(columns=rename_dates_dict)

In [23]:
tube_test_df.head()

Unnamed: 0,cage,animal,04_25,04_26,04_27,04_28,04_29,05_02,05_04,05_05,05_06,05_09,05_10,05_11,05_12,05_13,05_20
0,1,1.1 v 2.2,1.1,1.1,1.1,2.2,1.1,1.1,,1.1,1.1,1.1,1.1,1.1,1.1,1.1,1.1
1,1,2.2 v 2.3,2.2,2.3,2.2,2.2,2.2,2.2,,2.2,2.2,2.2,2.2,2.2,2.2,2.2,2.2
2,1,2.3 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4
3,1,1.4 v 1.1,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4
4,1,2.2 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4


In [24]:
tube_test_df["all_animals"] = tube_test_df["animal"].apply(lambda x: get_all_animals(x))

## Get the Elo score for all fights

In [25]:
tube_test_df.columns

Index(['cage', 'animal', '04_25', '04_26', '04_27', '04_28', '04_29', '05_02',
       '05_04', '05_05', '05_06', '05_09', '05_10', '05_11', '05_12', '05_13',
       '05_20', 'all_animals'],
      dtype='object')

In [36]:
id_to_elo_score = defaultdict(lambda:1000)
id_to_day_to_elo_score = defaultdict(dict)
for column in tube_test_df.columns:
    if column not in ['cage', 'animal', 'all_animals']:
        print("Current Date: {}".format(column))
        id_to_match_number = defaultdict(lambda:1)

        for index, row in tube_test_df.iterrows():
            if row[column] == "nan":
                continue
            else:            
                winner_id = row[column]
            
            
            loser_id = list(set(row["all_animals"]) - set([winner_id]))
            loser_id = loser_id[0]
            current_winner_rating = id_to_elo_score[winner_id] 
            current_loser_rating = id_to_elo_score[loser_id] 
            
            id_to_elo_score[winner_id] = calculate_elo_score(subject_rating=current_winner_rating, agent_rating=current_loser_rating, score=1)
            id_to_elo_score[loser_id] = calculate_elo_score(subject_rating=current_loser_rating, agent_rating=current_winner_rating, score=0)
            id_to_day_to_elo_score[winner_id]["cage"] = row["cage"]
            id_to_day_to_elo_score[loser_id]["cage"] = row["cage"]          
            id_to_day_to_elo_score[winner_id]["{}_match_{}".format(column, id_to_match_number[winner_id])] = id_to_elo_score[winner_id]
            id_to_day_to_elo_score[loser_id]["{}_match_{}".format(column, id_to_match_number[winner_id])] = id_to_elo_score[loser_id]
            print(row["animal"], "cage: ", row["cage"])
            print(winner_id, id_to_match_number[winner_id])
            print(loser_id, id_to_match_number[loser_id])
            id_to_match_number[winner_id] += 1
            id_to_match_number[loser_id] += 1
    

Current Date: 04_25
1.1 v 2.2 cage:  1
1.1 1
2.2 1
2.2 v 2.3 cage:  1
2.2 2
2.3 1
2.3 v 1.4 cage:  1
1.4 1
2.3 2
1.4 v 1.1 cage:  1
1.4 2
1.1 2
2.2 v 1.4 cage:  1
1.4 3
2.2 3
1.1 v 2.3 cage:  1
2.3 3
1.1 3
1.2 v 1.3 cage:  2
1.2 1
1.3 1
1.3 v 2.4 cage:  2
2.4 1
1.3 2
2.1 v 1.3 cage:  2
2.1 1
1.3 3
3.2 v 4.2 cage:  3
3.2 1
4.2 1
4.2 v 3.3 cage:  3
4.2 2
3.3 1
3.3 v 4.1 cage:  3
3.3 2
4.1 1
4.1 v 3.2 cage:  3
3.2 2
4.1 2
4.1 v 4.2 cage:  3
4.2 3
4.1 3
3.2 v 3.3 cage:  3
3.3 3
3.2 3
3.4 v 4.3 cage:  4
4.3 1
3.4 1
4.3 v 3.1 cage:  4
4.3 2
3.1 1
3.1 v 4.4 cage:  4
3.1 2
4.4 1
4.4 v 3.4 cage:  4
3.4 2
4.4 2
4.3 v 4.4 cage:  4
4.3 3
4.4 3
3.4 v 3.1 cage:  4
3.1 3
3.4 3
Current Date: 04_26
1.1 v 2.2 cage:  1
1.1 1
2.2 1
2.2 v 2.3 cage:  1
2.3 1
2.2 2
2.3 v 1.4 cage:  1
1.4 1
2.3 2
1.4 v 1.1 cage:  1
1.4 2
1.1 2
2.2 v 1.4 cage:  1
1.4 3
2.2 3
1.1 v 2.3 cage:  1
1.1 3
2.3 3
2.1 v 1.2 cage:  2
2.1 1
1.2 1
1.2 v 1.3 cage:  2
1.2 2
1.3 1
1.3 v 2.4 cage:  2
2.4 1
1.3 2
2.4 v 2.1 cage:  2
2.4 2
2.1 2

In [27]:
id_to_elo_score

defaultdict(<function __main__.<lambda>()>,
            {'1.1': 1148.7856476091606,
             '2.2': 775.9004494043129,
             '2.3': 402.5305203032688,
             '1.4': 1690.6829311768026,
             '1.2': 814.2572997871117,
             '1.3': 439.4400066101164,
             '2.4': 1484.1112805347207,
             '2.1': 1244.291864574505,
             '3.2': 1676.7308664601276,
             '4.2': 1243.321604033252,
             '3.3': 780.5585725784883,
             '4.1': 299.38895692813134,
             '4.3': 1587.6518944292377,
             '3.4': 1131.0906235005993,
             '3.1': 1005.8660918659234,
             '4.4': 275.39139020423966})

In [28]:
id_to_day_to_elo_score

defaultdict(dict,
            {'1.1': {'cage': '1',
              '04_25_match_1': 1010.0,
              '04_25_match_2': 999.991956254121,
              '04_25_match_3': 990.5664993847546,
              '04_26_match_1': 1000.6081113244983,
              '04_26_match_2': 989.4288003083827,
              '04_26_match_3': 999.4458171400197,
              '04_27_match_1': 1010.6783369358568,
              '04_27_match_2': 998.7642386504956,
              '04_27_match_3': 1009.957621071186,
              '04_28_match_1': 1001.9308944139408,
              '04_28_match_2': 988.7057527519511,
              '04_28_match_3': 1000.6205176739941,
              '04_29_match_1': 1011.730691609488,
              '04_29_match_2': 997.6545814568298,
              '04_29_match_3': 1010.8624439325795,
              '05_02_match_1': 1022.6946826909474,
              '05_02_match_2': 1007.7912028258518,
              '05_02_match_3': 1022.2865806235221,
              '05_05_match_1': 1034.8852337592525,
 

In [29]:
elo_score_df = pd.DataFrame.from_dict(id_to_day_to_elo_score, orient="index")

In [30]:
elo_score_df = elo_score_df.reindex(sorted(elo_score_df.columns), axis=1)

In [31]:
elo_score_df

Unnamed: 0,04_25_match_1,04_25_match_2,04_25_match_3,04_26_match_1,04_26_match_2,04_26_match_3,04_27_match_1,04_27_match_2,04_27_match_3,04_28_match_1,...,05_12_match_1,05_12_match_2,05_12_match_3,05_13_match_1,05_13_match_2,05_13_match_3,05_20_match_1,05_20_match_2,05_20_match_3,cage
1.1,1010.0,999.991956,990.566499,1000.608111,989.4288,999.445817,1010.678337,998.764239,1009.957621,1001.930894,...,1112.063676,1093.672186,1112.901537,1129.836212,1111.133554,1130.621616,1148.078558,1129.121659,1148.785648,1
2.2,990.0,999.712256,989.120744,968.781191,,956.404858,945.172338,954.206881,940.487852,948.514578,...,819.524009,835.665901,815.981372,799.046696,815.774612,795.976847,778.519905,795.771441,775.900449,1
2.3,980.008275,990.287744,989.433732,988.837574,,978.820558,957.103357,969.786015,945.909975,921.348622,...,534.829081,554.75008,515.599731,478.912908,498.871815,459.424846,422.194509,442.17331,402.53052,1
1.4,1010.279468,1020.287512,1030.879024,1041.773123,1052.952434,1065.328768,1078.011425,1089.925523,1103.644553,1118.13094,...,1535.34089,1553.73238,1573.416909,1593.375816,1612.078474,1631.876239,1651.85504,1670.81194,1690.682931,1
1.2,1010.0,,,999.983212,1010.868941,1000.266983,989.631307,1001.19257,989.338578,,...,856.894154,873.04434,853.993247,836.891559,853.633034,834.335137,816.476459,833.741514,814.2573,2
1.3,969.128991,,,946.756406,958.243262,934.940186,910.080958,923.378924,896.957142,882.167028,...,571.862846,591.648841,552.582455,515.968308,535.84098,496.388713,459.198781,479.123658,439.440007,2
2.4,1010.287744,,,1021.7746,1031.808406,1042.410363,1055.708329,1066.360479,1078.21447,1093.004585,...,1397.808445,1394.122819,1413.173912,1433.046584,1429.27664,1448.574537,1468.499415,1464.627066,1484.111281,2
2.1,1010.583265,,,1020.600053,1010.566247,1022.382467,1033.018143,1022.365994,1035.48981,,...,1139.38482,1143.070446,1162.350837,1179.452526,1183.222469,1202.802064,1220.660742,1224.533091,1244.291865,2
3.2,1010.0,1020.566994,1011.158032,1021.183094,1032.974605,1043.618926,1054.245895,1067.865055,1079.185665,1091.037151,...,1521.706767,1541.668241,1561.284303,1579.019079,1598.999239,1618.749872,1636.88573,1656.874335,1676.730866,3
4.2,990.0,999.712256,1010.287261,1000.262199,1009.998756,1021.807176,1001.504384,,1014.107902,1002.256416,...,1166.917764,1184.035979,1203.798475,1186.063699,1203.678627,1223.526201,1205.390343,1223.430817,1243.321604,3


In [32]:
elo_score_df.to_csv("./proc/id_to_date_elo_score.csv")
# elo_score_df.to_excel("./proc/id_to_date_elo_score.xlsx")

# DROP days that have have draws