# Elo Score Calculation

In [1]:
import re
from collections import defaultdict

In [2]:
import numpy as np
import pandas as pd

## Summary

- Notebook that calcuates the Elo Score of mice competing over access to a tone associated reward port
- The data is a spreadsheet of combination of subjects (as rows) against dates (as columns)

## Importing Data

- The original data has been trimmed so that only the cells with dates, winner ID's, and relevant metadata were kept

In [37]:
tube_test_df = pd.read_csv("../../data/tube_test_fights.csv")

In [38]:
tube_test_df.head()

Unnamed: 0,cage,animal,25-Apr,26-Apr,27-Apr,28-Apr,29-Apr,2-May,4-May,5-May,6-May,9-May,10-May,11-May,12-May,13-May,20-May
0,1,1.1 v 2.2,1.1,1.1,1.1,2.2,1.1,1.1,,1.1,1.1,1.1,1.1,1.1,1.1,1.1,1.1
1,1,2.2 v 2.3,2.2,2.3,2.2,2.2,2.2,2.2,,2.2,2.2,2.2,2.2,2.2,2.2,2.2,2.2
2,1,2.3 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4
3,1,1.4 v 1.1,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4
4,1,2.2 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4


## Doing it for a subset of the data

- To test out the code, we will be using the data from only one cage

In [39]:
# Getting all the rows that are from cage 1
cage_1_df = tube_test_df[tube_test_df["cage"] == 1]

In [40]:
cage_1_df

Unnamed: 0,cage,animal,25-Apr,26-Apr,27-Apr,28-Apr,29-Apr,2-May,4-May,5-May,6-May,9-May,10-May,11-May,12-May,13-May,20-May
0,1,1.1 v 2.2,1.1,1.1,1.1,2.2,1.1,1.1,,1.1,1.1,1.1,1.1,1.1,1.1,1.1,1.1
1,1,2.2 v 2.3,2.2,2.3,2.2,2.2,2.2,2.2,,2.2,2.2,2.2,2.2,2.2,2.2,2.2,2.2
2,1,2.3 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4
3,1,1.4 v 1.1,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4
4,1,2.2 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4
5,1,1.1 v 2.3,2.3,1.1,1.1,1.1,1.1,1.1,,1.1,1.1,1.1,1.1,1.1,1.1,1.1,1.1


## Getting a list of all the animals

- Because each cell does not contain the ID of all the animals that were competing, we will make a new column with both ID's

In [54]:
def get_all_animal_ids(animal_string):
    """
    Converts a string that contains the ID of animals, and only gets the IDs. 
    This usually removes extra characters that were added. (i.e. "1.1 v 2.2" to ("1.1", "2.2"))

    Args:
        animal_string(str): This is the first param.

    Returns:
        tuple: Of IDs of animals as strings
    """
    # Splitting by space so that we have a list of just the words
    all_words = animal_string.split()
    # Removing all words that are not numbers
    all_numbers = [num for num in all_words if re.match(r'^-?\d+(?:\.\d+)$', num)]
    return tuple(all_numbers)


In [48]:
print(get_all_animal_ids("1.1 v 2.3"))

('1.1', '2.3')


- Turning all the columns into string so that we can match the ID's from one column to another

In [45]:
for col in cage_1_df.columns:
    cage_1_df[col] = cage_1_df[col].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cage_1_df[col] = cage_1_df[col].astype(str)


In [50]:
cage_1_df.head()

Unnamed: 0,cage,animal,25-Apr,26-Apr,27-Apr,28-Apr,29-Apr,2-May,4-May,5-May,6-May,9-May,10-May,11-May,12-May,13-May,20-May,all_animals
0,1,1.1 v 2.2,1.1,1.1,1.1,2.2,1.1,1.1,,1.1,1.1,1.1,1.1,1.1,1.1,1.1,1.1,"(1.1, 2.2)"
1,1,2.2 v 2.3,2.2,2.3,2.2,2.2,2.2,2.2,,2.2,2.2,2.2,2.2,2.2,2.2,2.2,2.2,"(2.2, 2.3)"
2,1,2.3 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,"(2.3, 1.4)"
3,1,1.4 v 1.1,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,"(1.4, 1.1)"
4,1,2.2 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,"(2.2, 1.4)"


In [49]:
cage_1_df["all_animals"] = cage_1_df["animal"].apply(lambda x: get_all_animal_ids(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cage_1_df["all_animals"] = cage_1_df["animal"].apply(lambda x: get_all_animal_ids(x))


In [51]:
cage_1_df.head()

Unnamed: 0,cage,animal,25-Apr,26-Apr,27-Apr,28-Apr,29-Apr,2-May,4-May,5-May,6-May,9-May,10-May,11-May,12-May,13-May,20-May,all_animals
0,1,1.1 v 2.2,1.1,1.1,1.1,2.2,1.1,1.1,,1.1,1.1,1.1,1.1,1.1,1.1,1.1,1.1,"(1.1, 2.2)"
1,1,2.2 v 2.3,2.2,2.3,2.2,2.2,2.2,2.2,,2.2,2.2,2.2,2.2,2.2,2.2,2.2,2.2,"(2.2, 2.3)"
2,1,2.3 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,"(2.3, 1.4)"
3,1,1.4 v 1.1,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,"(1.4, 1.1)"
4,1,2.2 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,"(2.2, 1.4)"


## Elo Score Calculation

In [105]:
def calculate_elo_score(subject_elo_score, agent_elo_score, k_factor=20, score=1, number_of_decimals=None):
    """
    Calculates the Elo score of a given subject given it's original score, it's opponent, 
    the K-Factor, and whether or not it has won or not. 
    The calculation is based on: https://www.omnicalculator.com/sports/elo

    Args:
        subject_elo_score(float): The original Elo score for the subject
        agent_elo_score(float): The original Elo score for the agent
        k_factor(int): k-factor, or development coefficient. 
            - It usually takes values between 10 and 40, depending on player's strength 
        score(int): the actual outcome of the game. 
            - In chess, a win counts as 1 point, a draw is equal to 0.5, and a lose gives 0.
        number_of_decimals(int): Number of decimals to round to
        
    Returns:
        int: Updated Elo score of the subject
    """
    # Calculating the Elo score
    rating_difference = agent_elo_score - subject_elo_score
    expected_score = 1 / (1 + 10 ** (rating_difference / 400))
    new_elo_score = subject_elo_score + k_factor * (score - expected_score)
    # Rounding to `number_of_decimals`
    return round(new_elo_score, number_of_decimals)

In [106]:
calculate_elo_score(subject_elo_score=1500, agent_elo_score=500, score=0)

1480

## Calculate all the Elo scores for this cage

In [16]:
cage_1_df.columns

Index(['cage', 'animal', '25-Apr', '26-Apr', '27-Apr', '28-Apr', '29-Apr',
       '2-May', '4-May', '5-May', '6-May', '9-May', '10-May', '11-May',
       '12-May', '13-May', '20-May', 'all_animals'],
      dtype='object')

In [73]:
# Dictionary that keeps track of the current Elo score of the subject
id_to_elo_score = defaultdict(lambda:1000)

index_to_elo_score_and_meta_data = defaultdict(dict)
# Iterating through each column which is a day
all_indexes = iter(range(0, 99999))
for column in cage_1_df:
    if column not in ['cage', 'animal', 'all_animals']:
        print("Current Date: {}".format(column))
        # Keeping track of the number of matches
        id_to_match_number = defaultdict(lambda:1)
        for index, row in cage_1_df.iterrows():
            # Checking if there is an Nan or not
            if row[column] == "nan":
                continue
            else:            
                winner_id = row[column]

            # Getting the ID of the loser subject
            loser_id = list(set(row["all_animals"]) - set([winner_id]))
            loser_id = loser_id[0]
            # Getting the current Elo Score
            current_winner_rating = id_to_elo_score[winner_id] 
            current_loser_rating = id_to_elo_score[loser_id] 
            # Calculating Elo score            
            id_to_elo_score[winner_id] = calculate_elo_score(subject_elo_score=current_winner_rating, agent_elo_score=current_loser_rating, score=1)
            id_to_elo_score[loser_id] = calculate_elo_score(subject_elo_score=current_loser_rating, agent_elo_score=current_winner_rating, score=0)
            
            # Saving all the data for the winner
            winner_index = next(all_indexes)
            index_to_elo_score_and_meta_data[winner_index]["date"] = column
            index_to_elo_score_and_meta_data[winner_index]["match_number"] = id_to_match_number[winner_id]
            index_to_elo_score_and_meta_data[winner_index]["subject_id"] = winner_id
            index_to_elo_score_and_meta_data[winner_index]["agent_id"] = loser_id
            index_to_elo_score_and_meta_data[winner_index]["original_elo_score"] = current_winner_rating
            index_to_elo_score_and_meta_data[winner_index]["updated_elo_score"] = id_to_elo_score[winner_id]
            index_to_elo_score_and_meta_data[winner_index]["win_draw_loss"] = 1
            
            # Saving all the data for the loser
            loser_index = next(all_indexes)
            index_to_elo_score_and_meta_data[loser_index]["date"] = column
            index_to_elo_score_and_meta_data[loser_index]["match_number"] = id_to_match_number[loser_id]
            index_to_elo_score_and_meta_data[loser_index]["subject_id"] = loser_id
            index_to_elo_score_and_meta_data[loser_index]["agent_id"] = winner_id
            index_to_elo_score_and_meta_data[loser_index]["original_elo_score"] = current_loser_rating
            index_to_elo_score_and_meta_data[loser_index]["updated_elo_score"] = id_to_elo_score[loser_id]
            index_to_elo_score_and_meta_data[loser_index]["win_draw_loss"] = 0
            
            id_to_match_number[winner_id] += 1
            id_to_match_number[loser_id] += 1
        break
    

Current Date: 25-Apr


# Calculate Elo score for all cells

In [77]:
for col in tube_test_df.columns:
    tube_test_df[col] = tube_test_df[col].astype(str)

In [78]:
rename_dates_dict = {"25-Apr": "04_25",  "26-Apr": "04_26",  "27-Apr": "04_27",  "28-Apr": "04_28",  "29-Apr": "04_29",  "2-May": "05_02", "4-May": "05_04", "5-May": "05_05", "6-May": "05_06", "9-May": "05_09", "10-May": "05_10", "11-May": "05_11", "12-May": "05_12", "13-May": "05_13",  "20-May": "05_20"}

In [79]:
tube_test_df = tube_test_df.rename(columns=rename_dates_dict)

In [80]:
tube_test_df.head()

Unnamed: 0,cage,animal,04_25,04_26,04_27,04_28,04_29,05_02,05_04,05_05,05_06,05_09,05_10,05_11,05_12,05_13,05_20
0,1,1.1 v 2.2,1.1,1.1,1.1,2.2,1.1,1.1,,1.1,1.1,1.1,1.1,1.1,1.1,1.1,1.1
1,1,2.2 v 2.3,2.2,2.3,2.2,2.2,2.2,2.2,,2.2,2.2,2.2,2.2,2.2,2.2,2.2,2.2
2,1,2.3 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4
3,1,1.4 v 1.1,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4
4,1,2.2 v 1.4,1.4,1.4,1.4,1.4,1.4,1.4,,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4


In [81]:
tube_test_df["all_animals"] = tube_test_df["animal"].apply(lambda x: get_all_animals(x))

## Get the Elo score for one fight

In [82]:
tube_test_df.columns

Index(['cage', 'animal', '04_25', '04_26', '04_27', '04_28', '04_29', '05_02',
       '05_04', '05_05', '05_06', '05_09', '05_10', '05_11', '05_12', '05_13',
       '05_20', 'all_animals'],
      dtype='object')

In [107]:
# Dictionary that keeps track of the current Elo score of the subject
id_to_elo_score = defaultdict(lambda:1000)

index_to_elo_score_and_meta_data = defaultdict(dict)
# Iterating through each column which is a day
all_indexes = iter(range(0, 99999))
for column in tube_test_df:
    if column not in ['cage', 'animal', 'all_animals']:
        print("Current Date: {}".format(column))
        # Keeping track of the number of matches
        id_to_match_number = defaultdict(lambda:1)
        for index, row in tube_test_df.iterrows():
            # Checking if there is an Nan or not
            if row[column] == "nan":
                continue
            else:            
                winner_id = row[column]

            # Getting the ID of the loser subject
            loser_id = list(set(row["all_animals"]) - set([winner_id]))
            loser_id = loser_id[0]
            # Getting the current Elo Score
            current_winner_rating = id_to_elo_score[winner_id] 
            current_loser_rating = id_to_elo_score[loser_id] 
            # Calculating Elo score            
            id_to_elo_score[winner_id] = calculate_elo_score(subject_elo_score=current_winner_rating, agent_elo_score=current_loser_rating, score=1, number_of_decimals=1)
            id_to_elo_score[loser_id] = calculate_elo_score(subject_elo_score=current_loser_rating, agent_elo_score=current_winner_rating, score=0, number_of_decimals=1)
            
            # Saving all the data for the winner
            winner_index = next(all_indexes)
            index_to_elo_score_and_meta_data[winner_index]["date"] = column
            index_to_elo_score_and_meta_data[winner_index]["match_number"] = id_to_match_number[winner_id]
            index_to_elo_score_and_meta_data[winner_index]["cage"] = row["cage"]
            index_to_elo_score_and_meta_data[winner_index]["subject_id"] = winner_id
            index_to_elo_score_and_meta_data[winner_index]["agent_id"] = loser_id
            index_to_elo_score_and_meta_data[winner_index]["original_elo_score"] = current_winner_rating
            index_to_elo_score_and_meta_data[winner_index]["updated_elo_score"] = id_to_elo_score[winner_id]
            index_to_elo_score_and_meta_data[winner_index]["win_draw_loss"] = 1
            
            
            # Saving all the data for the loser
            loser_index = next(all_indexes)
            index_to_elo_score_and_meta_data[loser_index]["date"] = column
            index_to_elo_score_and_meta_data[loser_index]["match_number"] = id_to_match_number[loser_id]
            index_to_elo_score_and_meta_data[loser_index]["cage"] = row["cage"]
            index_to_elo_score_and_meta_data[loser_index]["subject_id"] = loser_id
            index_to_elo_score_and_meta_data[loser_index]["agent_id"] = winner_id
            index_to_elo_score_and_meta_data[loser_index]["original_elo_score"] = current_loser_rating
            index_to_elo_score_and_meta_data[loser_index]["updated_elo_score"] = id_to_elo_score[loser_id]
            index_to_elo_score_and_meta_data[loser_index]["win_draw_loss"] = 0
            
            id_to_match_number[winner_id] += 1
            id_to_match_number[loser_id] += 1
    

Current Date: 04_25
Current Date: 04_26
Current Date: 04_27
Current Date: 04_28
Current Date: 04_29
Current Date: 05_02
Current Date: 05_04
Current Date: 05_05
Current Date: 05_06
Current Date: 05_09
Current Date: 05_10
Current Date: 05_11
Current Date: 05_12
Current Date: 05_13
Current Date: 05_20


In [108]:
id_to_elo_score

defaultdict(<function __main__.<lambda>()>,
            {'1.1': 1065.7,
             '2.2': 937.8,
             '2.3': 778.9,
             '1.4': 1238.0,
             '1.2': 937.4,
             '1.3': 781.4,
             '2.4': 1145.0,
             '2.1': 1115.8,
             '3.2': 1228.3,
             '4.2': 1082.2,
             '3.3': 905.0,
             '4.1': 784.5,
             '4.3': 1110.6,
             '3.4': 1129.5,
             '3.1': 981.9,
             '4.4': 778.0})

In [109]:
index_to_elo_score_and_meta_data[0]

{'date': '04_25',
 'match_number': 1,
 'cage': '1',
 'subject_id': '1.1',
 'agent_id': '2.2',
 'original_elo_score': 1000,
 'updated_elo_score': 1010.0,
 'win_draw_loss': 1}

In [110]:
elo_score_df = pd.DataFrame.from_dict(index_to_elo_score_and_meta_data, orient="index")

In [111]:
elo_score_df.head(n=25)

Unnamed: 0,date,match_number,cage,subject_id,agent_id,original_elo_score,updated_elo_score,win_draw_loss
0,04_25,1,1,1.1,2.2,1000.0,1010.0,1
1,04_25,1,1,2.2,1.1,1000.0,990.0,0
2,04_25,2,1,2.2,2.3,990.0,1000.3,1
3,04_25,1,1,2.3,2.2,1000.0,989.7,0
4,04_25,1,1,1.4,2.3,1000.0,1009.7,1
5,04_25,2,1,2.3,1.4,989.7,980.0,0
6,04_25,2,1,1.4,1.1,1009.7,1019.7,1
7,04_25,2,1,1.1,1.4,1010.0,1000.0,0
8,04_25,3,1,1.4,2.2,1019.7,1029.1,1
9,04_25,3,1,2.2,1.4,1000.3,990.9,0


In [32]:
elo_score_df.to_csv("./proc/id_to_date_elo_score.csv")
# elo_score_df.to_excel("./proc/id_to_date_elo_score.xlsx")

# DROP days that have have draws