# Elo Score Calculation

In [1]:
import re
import os
import sys
from collections import defaultdict

In [2]:
from datetime import datetime
from datetime import timedelta
import matplotlib.pyplot as plt

In [3]:
import numpy as np
import pandas as pd
import xlrd
from labellines import labelLine, labelLines

In [4]:
# setting path
sys.path.append('../../src')

In [5]:
from eloscore import eloscore

In [6]:
# Increase size of plot in jupyter

plt.rcParams["figure.figsize"] = (18,10)

## Summary

- Notebook that calcuates the Elo Score of mice competing over access to a tone associated reward port
- The data is a spreadsheet of combination of subjects (as rows) against dates (as columns)

## Importing Data for One Sheet

- The original data has been trimmed so that only the cells with dates, winner ID's, and relevant metadata were kept

In [7]:
xls = pd.ExcelFile("./data/Reward_Competition_Scoring_MasterList.xlsx")
sheets = xls.sheet_names

In [8]:
sheets

['Master Assignment List',
 'Ezra',
 'Albert',
 'Isabel',
 'Catalina',
 'Jocelyn',
 'Meghan',
 'Erika',
 'Naeliz']

In [9]:
reward_competition_df = pd.read_excel("./data/Reward_Competition_Scoring_MasterList.xlsx", sheet_name=sheets[1], header=0)

In [10]:
copy_reward_competition_df = reward_competition_df.copy()

In [11]:
reward_competition_df.head()

Unnamed: 0,Date,Cage,Box,Match,Trial 1 Timer,Trial 1 Winner,Trial 2 Time,Trial 2 Winner,Trial 3 Time,Trial 3 Winner,...,Trial 16 Winner,Trial 17 Time,Trial 17 Winner,Trial 18 Time,Trial 18 Winner,Trial 19 Time,Trial 19 Winner,Mouse 1 Wins,Mouse 2 Wins,Ties
0,2022-07-28,1,3,2v3,00:37-00:43,2.0,1:31-1:37,2.0,2:30-2:36,2,...,tie,16:42-16:48,3,18:02-18:08,tie,18:55-19:01,tie,12.0,3.0,4.0
1,2022-07-28,2,2,1v4,00:37-00:43,1.0,1:31-1:37,1.0,2:31-2:37,tie,...,1,16:42-16:48,4,18:02-18:08,4,18:55-19:01,4,8.0,8.0,3.0
2,2022-07-28,3,1,2v4,00:46-00:54,2.0,1:53-2:00,2.0,3:07-3:15,2,...,4,20:52-21:00,4,22:31-22:39,4,23:38-23:45,4,7.0,8.0,4.0
3,2022-07-28,5,2,3v4,00:46-00:53,4.0,1:52-2:00,4.0,3:07-3:15,4,...,3,20:09-20:16,4,21:38-21:44,3,22:36-22:43,3,6.0,13.0,0.0
4,2022-07-28,6,1,2v3,00:48-00:55,2.0,1:54-2:01,3.0,3:10-3:16,tie,...,tie,21:02-21:09,tie,22:22-22:49,3,23:48-23:55,3,7.0,7.0,4.0


## Adding the session number

- Each new session has the date as a row, so we will get the index of all the rows with dates

In [12]:
reward_competition_df.dropna(axis=0, subset="Date")

Unnamed: 0,Date,Cage,Box,Match,Trial 1 Timer,Trial 1 Winner,Trial 2 Time,Trial 2 Winner,Trial 3 Time,Trial 3 Winner,...,Trial 16 Winner,Trial 17 Time,Trial 17 Winner,Trial 18 Time,Trial 18 Winner,Trial 19 Time,Trial 19 Winner,Mouse 1 Wins,Mouse 2 Wins,Ties
0,2022-07-28,1,3,2v3,00:37-00:43,2.0,1:31-1:37,2.0,2:30-2:36,2,...,tie,16:42-16:48,3,18:02-18:08,tie,18:55-19:01,tie,12.0,3.0,4.0
1,2022-07-28,2,2,1v4,00:37-00:43,1.0,1:31-1:37,1.0,2:31-2:37,tie,...,1,16:42-16:48,4,18:02-18:08,4,18:55-19:01,4,8.0,8.0,3.0
2,2022-07-28,3,1,2v4,00:46-00:54,2.0,1:53-2:00,2.0,3:07-3:15,2,...,4,20:52-21:00,4,22:31-22:39,4,23:38-23:45,4,7.0,8.0,4.0
3,2022-07-28,5,2,3v4,00:46-00:53,4.0,1:52-2:00,4.0,3:07-3:15,4,...,3,20:09-20:16,4,21:38-21:44,3,22:36-22:43,3,6.0,13.0,0.0
4,2022-07-28,6,1,2v3,00:48-00:55,2.0,1:54-2:01,3.0,3:10-3:16,tie,...,tie,21:02-21:09,tie,22:22-22:49,3,23:48-23:55,3,7.0,7.0,4.0
5,2022-07-29,1,2,2v3,00:38-00:44,2.0,1:31-1:37,2.0,2:31-2:37,3,...,2,16:43-16:49,tie,18:02-18:09,2,18:56-19:02,3,9.0,9.0,1.0
6,2022-07-29,3,4,1v2,,,,,,,...,,,,,,,,,,
7,2022-07-29,5,3,1v3,,,,,,,...,,,,,,,,,,
8,2022-07-29,6,1,3v4,,,,,,,...,,,,,,,,,,


- Getting the indexes of where each new session starts

In [13]:
reward_competition_df.dropna(axis=0, subset="Date").index

RangeIndex(start=0, stop=9, step=1)

In [14]:
session_indexes = reward_competition_df.dropna(axis=0, subset="Date").index

- Adding a column for the session number and then filling all the `NaNs`

In [15]:
reward_competition_df = eloscore.add_session_number_column(reward_competition_df, session_indexes)
reward_competition_df = reward_competition_df.fillna(method='ffill')

- Making another column for plotting

In [16]:
reward_competition_df = eloscore.add_session_number_column(reward_competition_df, session_indexes, session_number_column="session_number_plotting")

In [17]:
reward_competition_df.head()

Unnamed: 0,Date,Cage,Box,Match,Trial 1 Timer,Trial 1 Winner,Trial 2 Time,Trial 2 Winner,Trial 3 Time,Trial 3 Winner,...,Trial 17 Winner,Trial 18 Time,Trial 18 Winner,Trial 19 Time,Trial 19 Winner,Mouse 1 Wins,Mouse 2 Wins,Ties,session_number,session_number_plotting
0,2022-07-28,1,3,2v3,00:37-00:43,2.0,1:31-1:37,2.0,2:30-2:36,2,...,3,18:02-18:08,tie,18:55-19:01,tie,12.0,3.0,4.0,1.0,1.0
1,2022-07-28,2,2,1v4,00:37-00:43,1.0,1:31-1:37,1.0,2:31-2:37,tie,...,4,18:02-18:08,4,18:55-19:01,4,8.0,8.0,3.0,2.0,2.0
2,2022-07-28,3,1,2v4,00:46-00:54,2.0,1:53-2:00,2.0,3:07-3:15,2,...,4,22:31-22:39,4,23:38-23:45,4,7.0,8.0,4.0,3.0,3.0
3,2022-07-28,5,2,3v4,00:46-00:53,4.0,1:52-2:00,4.0,3:07-3:15,4,...,4,21:38-21:44,3,22:36-22:43,3,6.0,13.0,0.0,4.0,4.0
4,2022-07-28,6,1,2v3,00:48-00:55,2.0,1:54-2:01,3.0,3:10-3:16,tie,...,tie,22:22-22:49,3,23:48-23:55,3,7.0,7.0,4.0,5.0,5.0


## Calculating ELO Score

- Example calculation

In [18]:
eloscore.calculate_elo_score(subject_elo_score=1000, agent_elo_score=2000)

1020

## Get the Elo score for all the events

In [19]:
reward_competition_df.head()

Unnamed: 0,Date,Cage,Box,Match,Trial 1 Timer,Trial 1 Winner,Trial 2 Time,Trial 2 Winner,Trial 3 Time,Trial 3 Winner,...,Trial 17 Winner,Trial 18 Time,Trial 18 Winner,Trial 19 Time,Trial 19 Winner,Mouse 1 Wins,Mouse 2 Wins,Ties,session_number,session_number_plotting
0,2022-07-28,1,3,2v3,00:37-00:43,2.0,1:31-1:37,2.0,2:30-2:36,2,...,3,18:02-18:08,tie,18:55-19:01,tie,12.0,3.0,4.0,1.0,1.0
1,2022-07-28,2,2,1v4,00:37-00:43,1.0,1:31-1:37,1.0,2:31-2:37,tie,...,4,18:02-18:08,4,18:55-19:01,4,8.0,8.0,3.0,2.0,2.0
2,2022-07-28,3,1,2v4,00:46-00:54,2.0,1:53-2:00,2.0,3:07-3:15,2,...,4,22:31-22:39,4,23:38-23:45,4,7.0,8.0,4.0,3.0,3.0
3,2022-07-28,5,2,3v4,00:46-00:53,4.0,1:52-2:00,4.0,3:07-3:15,4,...,4,21:38-21:44,3,22:36-22:43,3,6.0,13.0,0.0,4.0,4.0
4,2022-07-28,6,1,2v3,00:48-00:55,2.0,1:54-2:01,3.0,3:10-3:16,tie,...,tie,22:22-22:49,3,23:48-23:55,3,7.0,7.0,4.0,5.0,5.0


In [20]:
eloscore.update_elo_score(winner_id="A", loser_id="B")

defaultdict(<function eloscore.eloscore.update_elo_score.<locals>.<lambda>()>,
            {'A': 1010.0, 'B': 990.0})

- Function that creates a dictionary that has the original/updated elo score for each event

In [21]:
index_to_elo_score_and_meta_data = eloscore.iterate_elo_score_calculation_for_data_frame(data_frame=reward_competition_df, winner_column="winner", loser_column="loser", additional_columns=["Date", "session_number", "session_number_plotting"])

KeyError: ['winner']

In [None]:
index_to_elo_score_and_meta_data[0]

- Turning the dictionary into a dataframe

In [None]:
reward_competition_elo_score_df = pd.DataFrame.from_dict(index_to_elo_score_and_meta_data, orient="index")

In [None]:
reward_competition_elo_score_df.head()

In [None]:
reward_competition_elo_score_df.tail()

In [None]:
reward_competition_elo_score_df.groupby("subject_id").count()

In [None]:
reward_competition_elo_score_df.groupby("session_number").count()

# Plotting the Elo Score by match number

In [None]:
# Increase size of plot in jupyter

plt.rcParams["figure.figsize"] = (18,10)

In [None]:
fig, ax = plt.subplots()

# Drawing vertical lines that represent when each session begins
for index, row in reward_competition_elo_score_df.dropna(subset=["session_number_plotting"]).iterrows():
    # Offsetting by 0.5 to avoid drawing the line on the dot
    plt.vlines(x=[row["total_match_number"] - 0.5], ymin=700, ymax=1300, colors='black', linestyle='dashed')

# Drawing a line for each subject
for subject in sorted(reward_competition_elo_score_df["subject_id"].unique()):
    # Getting all the rows with the current subject
    subject_df = reward_competition_elo_score_df[reward_competition_elo_score_df["subject_id"] == subject]
    # Making the dates into days after the first session by subtracting all the dates by the first date
    plt.plot(subject_df["total_match_number"], subject_df["updated_elo_score"], '-o', label=subject)

# Labeling the X/Y Axis and the title
ax.set_xlabel("Trial Number")
ax.set_ylabel("Elo Score")
# ax.set_title("Elo Score for Home Cage Observation: Cage {}".format(cage))
# To show the legend
ax.legend()
plt.xticks(rotation=90)
plt.ylim(700, 1300) 
plt.show()



# Calculate the Elo Score for all sheets

In [None]:
xls = pd.ExcelFile("./data/Reward_Competition_Scoring_MasterList.xlsx")
sheets = xls.sheet_names

In [None]:
sheets

In [None]:
sheet_to_data_frame = defaultdict(dict)
for sheet in sheets[1:]:
    sheet_to_data_frame[sheet]["recording_data_frame"] = pd.read_excel("./data/Reward_Competition_Scoring_MasterList.xlsx", sheet_name=sheet, header=1).copy()
    # Adding the session numbers as a column
    session_indexes = sheet_to_data_frame[sheet]["recording_data_frame"].dropna(axis=0, subset="Date").index
    sheet_to_data_frame[sheet]["recording_data_frame"] = eloscore.add_session_number_column(sheet_to_data_frame[sheet]["recording_data_frame"], session_indexes)
    sheet_to_data_frame[sheet]["recording_data_frame"] = sheet_to_data_frame[sheet]["recording_data_frame"].fillna(method='ffill')
    sheet_to_data_frame[sheet]["recording_data_frame"] = eloscore.add_session_number_column(sheet_to_data_frame[sheet]["recording_data_frame"], session_indexes, session_number_column="session_number_plotting")

    # Splitting all the rows with commas in the ID
    sheet_to_data_frame[sheet]["recording_data_frame"]["winner"] = sheet_to_data_frame[sheet]["recording_data_frame"]["winner"].astype(str)
    sheet_to_data_frame[sheet]["recording_data_frame"]["loser"] = sheet_to_data_frame[sheet]["recording_data_frame"]["loser"].astype(str)
    
    sheet_to_data_frame[sheet]["recording_data_frame"]["winner"] = sheet_to_data_frame[sheet]["recording_data_frame"]["winner"].str.split(",")
    sheet_to_data_frame[sheet]["recording_data_frame"] = sheet_to_data_frame[sheet]["recording_data_frame"].explode("winner").reset_index(drop=True)
    sheet_to_data_frame[sheet]["recording_data_frame"]["loser"] = sheet_to_data_frame[sheet]["recording_data_frame"]["loser"].str.split(",")
    sheet_to_data_frame[sheet]["recording_data_frame"] = sheet_to_data_frame[sheet]["recording_data_frame"].explode("loser").reset_index(drop=True)

    sheet_to_data_frame[sheet]["recording_data_frame"]["winner"] = sheet_to_data_frame[sheet]["recording_data_frame"]["winner"].astype(float).astype(int).astype(str)
    sheet_to_data_frame[sheet]["recording_data_frame"]["loser"] = sheet_to_data_frame[sheet]["recording_data_frame"]["loser"].astype(float).astype(int).astype(str)
    
    # Calculating the Elo Score    
    index_to_elo_score_and_meta_data = eloscore.iterate_elo_score_calculation_for_data_frame(data_frame=sheet_to_data_frame[sheet]["recording_data_frame"], \
                                                                                                               winner_column="winner", loser_column="loser", additional_columns=["Date", "session_number", "session_number_plotting"])
    sheet_to_data_frame[sheet]["elo_score_data_frame"] = pd.DataFrame.from_dict(index_to_elo_score_and_meta_data, orient="index")

# Making plots for all sheets

- Getting the dates the files were being recorded to use for the file name

In [None]:
sheets[1]

In [None]:
earliest_date = str(sheet_to_data_frame[sheets[1]]["recording_data_frame"]["Date"].min()).split()[0]

In [None]:
latest_date = str(sheet_to_data_frame[sheets[1]]["recording_data_frame"]["Date"].max()).split()[0]

In [None]:
earliest_date

In [None]:
latest_date

- Getting the cage numbers

In [None]:
all_cages = "_".join(sorted([sheet.lower().strip("cage").strip() for sheet in sheets[1:]]))

In [None]:
all_cages

In [None]:
output_directory = "./proc/plots/reward_competition_elo_score/cage_{}_date_{}_{}".format(all_cages, earliest_date, latest_date)

In [None]:
output_directory

In [None]:
os.makedirs(output_directory, exist_ok=True)

In [None]:
for key, value in sheet_to_data_frame.items():
    plt.rcParams["figure.figsize"] = (13.5,7.5)
    print(key)
    elo_score_data_frame = value["elo_score_data_frame"]
    fig, ax = plt.subplots()

    # Drawing vertical lines that represent when each session begins
    for index, row in elo_score_data_frame.dropna(subset=["session_number_plotting"]).iterrows():
        # Offsetting by 0.5 to avoid drawing the line on the dot
        plt.vlines(x=[row["total_match_number"] - 0.5], ymin=600, ymax=1400, colors='black', linestyle='dashed')

    # Drawing a line for each subject
    for subject in sorted(elo_score_data_frame["subject_id"].astype(str).unique()):
        # Getting all the rows with the current subject
        subject_df = elo_score_data_frame[elo_score_data_frame["subject_id"] == subject]
        # Making the dates into days after the first session by subtracting all the dates by the first date
        plt.plot(subject_df["total_match_number"], subject_df["updated_elo_score"], '-o', label=subject)

    # Labeling the X/Y Axis and the title
    ax.set_xlabel("Trial Number")
    ax.set_ylabel("Elo Score")
    ax.set_title("Elo Score for Home Cage Observation: {}".format(key))
    # To show the legend
    ax.legend()
    plt.xticks(rotation=90)
    plt.ylim(600, 1400) 
    
    
    file_name = "reward_competition_elo_score_{}_date_{}_{}.png".format(key, earliest_date, latest_date)
    plt.savefig(os.path.join(output_directory, file_name))
    
    plt.show()