# ELO Score, Weight, and RC Distance Consolidation

Brief 1-2 sentence description of notebook.

# Summary

In [1]:
import copy
import re
import os
import sys
import string
import glob
import ast
from collections import Counter
from collections import defaultdict
import warnings

In [2]:
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
import git
# Getting the path of the root directory so that we can import repo specific functions
git_repo_object = git.Repo('.', search_parent_directories=True)
git_repo_directory = git_repo_object.working_tree_dir

In [4]:
# Setting path so that we can import functions
sys.path.append(os.path.join(git_repo_directory, "src"))

In [5]:
os.path.join(git_repo_directory, "src")


'/nancy/projects/social_dominance_active_inference/src'

In [6]:
from elorating import calculation
from elorating import dataframe

In [7]:
# Increase size of plot in jupyter

plt.rcParams["figure.figsize"] = (18,10)

## Inputs & Data

Explanation of each input and where it comes from.

In [8]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case
OUTPUT_DIR = r"/root/work/" # where data is saved should always be shown in the inputs

## Inputs & Data

Explanation of each input and where it comes from.

In [9]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case
OUTPUT_DIR = r"./proc" # where data is saved should always be shown in the inputs

In [10]:
WEIGHTS_PATH = "../../data/pilot_3/Reward_Training_Weights_C57vsCD1_Pilot3.csv"
ELO_RATING_HISTORY_GLOB = glob.glob("./proc/elo_rating_spread_sheets/*/*elo-rating-history.csv") 

PAIRED_RC = pd.read_csv("./proc/elo_rating_spread_sheets/reward_competition/reward_competition_grouped_by_pairs_cage_1_2_3_4_5_6_date_20221003_20221004.csv", index_col=0)
PAIRED_TT = pd.read_csv("./proc/elo_rating_spread_sheets/tube_test/tt_grouped_by_pairs_cage_cages-1-2-3-4-5-6_date_2022-09-06_2022-09-27.csv", index_col=0)
PAIRED_HCO = pd.read_csv("./proc/elo_rating_spread_sheets/home_cage_observation/hco_grouped_by_pairs_cage_cages-2-4-5_date_2022-09-07_2022-10-05.csv", index_col=0)
RC_DISTANCE_DF = pd.read_pickle("../2023_09_18_rc_sleap_analysis/per_trial_rc_distance.pkl")

## Outputs

Describe each output that the notebook creates. 

- Is it a plot or is it data?

- How valuable is the output and why is it valuable or useful?

## Processing

Describe what is done to the data here and how inputs are manipulated to generate outputs. 

In [11]:
# As much code and as many cells as required
# includes EDA and playing with data
# GO HAM!

# Ideally functions are defined here first and then data is processed using the functions

# function names are short and in snake case all lowercase
# a function name should be unique but does not have to describe the function
# doc strings describe functions not function names



- Reading in the spreadsheets with the animal's weights

In [12]:
# weights_df = pd.read_excel(RAW_DATA_FILE_PATH, header=[0])
weights_df = pd.read_csv(WEIGHTS_PATH, header=[0,1])
weights_df = weights_df.set_index([('cage', 'cage'), ('id', 'id')])
# Stacking the level 0 of the columns (Group1, Group2), 
# which will move it to the index, creating a new row for each unique value in level 0
weights_df = weights_df.stack(level=1)
weights_df = weights_df.reset_index()
weights_df = weights_df.rename(columns={'level_2': 'date'})
weights_df["date"] = pd.to_datetime(weights_df["date"], format='%m/%d/%Y')
weights_df = weights_df.rename(columns={('cage', 'cage'): "cage", ('id', 'id'): "id"})
weights_df = weights_df.sort_values(by=["date", "cage", "id"])
weights_df = weights_df.reset_index(drop=True)

In [13]:
weights_df.columns

Index(['cage', 'id', 'date', 'weight', 'percent_body_weight', 'amount_fed',
       'Notes'],
      dtype='object')

In [14]:
weight_col = ['id', 'weight', 'percent_body_weight', 'amount_fed']

In [15]:
subject_weights_df = weights_df.copy()
subject_weights_df.columns = ["subject_{}".format(col) if col in weight_col else col for col in subject_weights_df.columns ]

agent_weights_df = weights_df.copy()
agent_weights_df.columns = ["agent_{}".format(col) if col in weight_col else col for col in agent_weights_df.columns  ]

In [16]:
subject_weights_df.head()

Unnamed: 0,cage,subject_id,date,subject_weight,subject_percent_body_weight,subject_amount_fed,Notes
0,1,1.1,2022-09-18,26.0,1.0,5.0,
1,1,1.2,2022-09-18,25.2,1.0,5.0,
2,1,1.3,2022-09-18,24.8,1.0,5.0,
3,1,1.4,2022-09-18,24.7,1.0,5.0,
4,2,2.1,2022-09-18,29.5,1.0,5.0,


In [17]:
agent_weights_df.head()

Unnamed: 0,cage,agent_id,date,agent_weight,agent_percent_body_weight,agent_amount_fed,Notes
0,1,1.1,2022-09-18,26.0,1.0,5.0,
1,1,1.2,2022-09-18,25.2,1.0,5.0,
2,1,1.3,2022-09-18,24.8,1.0,5.0,
3,1,1.4,2022-09-18,24.7,1.0,5.0,
4,2,2.1,2022-09-18,29.5,1.0,5.0,


- Reading in all the ELO Score dataframes

In [18]:
all_elo_score_history_df = []
for path in ELO_RATING_HISTORY_GLOB:
    all_elo_score_history_df.append(pd.read_csv(path, index_col=0))
all_elo_score_history_df = pd.concat(all_elo_score_history_df)
all_elo_score_history_df["date"] = pd.to_datetime(all_elo_score_history_df["date"], format='%Y-%m-%d')
all_elo_score_history_df = all_elo_score_history_df.sort_values(by=["date", "total_match_number"])
all_elo_score_history_df = all_elo_score_history_df.reset_index(drop=True)

In [19]:
all_elo_score_history_df.columns

Index(['total_trial_number', 'total_match_number', 'subject_id', 'agent_id',
       'original_elo_rating', 'updated_elo_rating', 'win_draw_loss',
       'subject_ranking', 'agent_ranking', 'pairing_index', 'index', 'date',
       'cage', 'box', 'match', 'tuple_animal_id', 'trial', 'winner',
       'keep_row', 'match_is_tie', 'trial_number', 'loser',
       'session_number_difference', 'strain', 'experiment_type', 'observer',
       'notes', 'length_of_observations', 'cage_#', 'action', 'sheet_name',
       'left_number_of_spots', 'right_number_of_spots',
       'spot_number_difference', 'percent_difference', 'runner'],
      dtype='object')

In [20]:
all_elo_score_history_df = pd.merge(left=all_elo_score_history_df, right=subject_weights_df, left_on=["subject_id", "date"], right_on=["subject_id", "date"], how="left")

all_elo_score_history_df = pd.merge(left=all_elo_score_history_df, right=agent_weights_df, left_on=["agent_id", "date"], right_on=["agent_id", "date"], how="left")


In [21]:
all_elo_score_history_df.columns

Index(['total_trial_number', 'total_match_number', 'subject_id', 'agent_id',
       'original_elo_rating', 'updated_elo_rating', 'win_draw_loss',
       'subject_ranking', 'agent_ranking', 'pairing_index', 'index', 'date',
       'cage_x', 'box', 'match', 'tuple_animal_id', 'trial', 'winner',
       'keep_row', 'match_is_tie', 'trial_number', 'loser',
       'session_number_difference', 'strain', 'experiment_type', 'observer',
       'notes', 'length_of_observations', 'cage_#', 'action', 'sheet_name',
       'left_number_of_spots', 'right_number_of_spots',
       'spot_number_difference', 'percent_difference', 'runner', 'cage_y',
       'subject_weight', 'subject_percent_body_weight', 'subject_amount_fed',
       'Notes_x', 'cage', 'agent_weight', 'agent_percent_body_weight',
       'agent_amount_fed', 'Notes_y'],
      dtype='object')

In [22]:
all_elo_score_history_df = pd.merge(left=all_elo_score_history_df, right=PAIRED_HCO, left_on="tuple_animal_id", right_on="hco_tuple_animal_id", how="left")
all_elo_score_history_df = pd.merge(left=all_elo_score_history_df, right=PAIRED_RC, left_on="tuple_animal_id", right_on="rc_tuple_animal_id", how="left")
all_elo_score_history_df = pd.merge(left=all_elo_score_history_df, right=PAIRED_TT, left_on="tuple_animal_id", right_on="tt_tuple_animal_id", how="left")

- Removing unnecessary rows

In [23]:
all_elo_score_history_df = all_elo_score_history_df.drop(columns=["total_match_number", "total_trial_number", "index", "cage_x", "box", "match", "keep_row", "session_number_difference", "observer", "notes", "length_of_observations", "sheet_name", "runner", "cage_y", "cage", "Notes_y", "Notes_x", 'processed_cage_number', 'match_is_tie', 'trial'], errors="ignore")

In [24]:
all_elo_score_history_df.columns

Index(['subject_id', 'agent_id', 'original_elo_rating', 'updated_elo_rating',
       'win_draw_loss', 'subject_ranking', 'agent_ranking', 'pairing_index',
       'date', 'tuple_animal_id', 'winner', 'trial_number', 'loser', 'strain',
       'experiment_type', 'cage_#', 'action', 'left_number_of_spots',
       'right_number_of_spots', 'spot_number_difference', 'percent_difference',
       'subject_weight', 'subject_percent_body_weight', 'subject_amount_fed',
       'agent_weight', 'agent_percent_body_weight', 'agent_amount_fed',
       'hco_tuple_animal_id', 'hco_winner', 'hco_loser', 'hco_averaged_winner',
       'hco_averaged_loser', 'hco_averaged_winner_win_count',
       'hco_averaged_loser_win_count', 'hco_count_difference',
       'hco_match_count', 'hco_percent_win', 'hco_percentage_tie',
       'rc_tuple_animal_id', 'rc_winner', 'rc_loser',
       'rc_average_number_of_switches', 'rc_winner_no_ties',
       'rc_loser_no_ties', 'rc_averaged_winner', 'rc_averaged_loser',
       'r

- Reordering columns

In [25]:
# Step 2: Specify the columns to move to the left
cols_to_move = ['strain', 'experiment_type', 'date', 'cage_#', 'tuple_animal_id', 'subject_id', 'agent_id', 
                'winner', 'loser', 'win_draw_loss', 'original_elo_rating', 'updated_elo_rating', 
                'subject_ranking', 'agent_ranking',  'subject_weight', 'subject_percent_body_weight',
                'subject_amount_fed', 'agent_weight', 'agent_percent_body_weight', 'agent_amount_fed']

# Step 3: Get the list of other columns
other_cols = [col for col in all_elo_score_history_df.columns if col not in cols_to_move]

# Step 4: Concatenate the lists to get the new column order
new_col_order = cols_to_move + other_cols


In [26]:
all_elo_score_history_df = all_elo_score_history_df[new_col_order]

In [27]:
all_elo_score_history_df.head()

Unnamed: 0,strain,experiment_type,date,cage_#,tuple_animal_id,subject_id,agent_id,winner,loser,win_draw_loss,...,tt_winner,tt_loser,tt_averaged_winner,tt_averaged_loser,tt_averaged_winner_win_count,tt_averaged_loser_win_count,tt_count_difference,tt_match_count,tt_percent_win,tt_percentage_tie
0,C57,tube_test,2022-09-06,1.0,"('1.1', '1.2')",1.1,1.2,1.1,1.2,1.0,...,"['1.1', '1.1', '1.1', '1.1', '1.1', '1.1', '1....","['1.2', '1.2', '1.2', '1.2', '1.2', '1.2', '1....",1.1,1.2,8,0,8,8,1.0,False
1,C57,tube_test,2022-09-06,1.0,"('1.1', '1.2')",1.2,1.1,1.1,1.2,0.0,...,"['1.1', '1.1', '1.1', '1.1', '1.1', '1.1', '1....","['1.2', '1.2', '1.2', '1.2', '1.2', '1.2', '1....",1.1,1.2,8,0,8,8,1.0,False
2,C57,tube_test,2022-09-06,2.0,"('2.1', '2.2')",2.1,2.2,2.1,2.2,1.0,...,"['2.1', '2.1', '2.1', '2.1', '2.1', '2.1', '2....","['2.2', '2.2', '2.2', '2.2', '2.2', '2.2', '2....",2.1,2.2,7,1,6,8,0.875,False
3,C57,tube_test,2022-09-06,2.0,"('2.1', '2.2')",2.2,2.1,2.1,2.2,0.0,...,"['2.1', '2.1', '2.1', '2.1', '2.1', '2.1', '2....","['2.2', '2.2', '2.2', '2.2', '2.2', '2.2', '2....",2.1,2.2,7,1,6,8,0.875,False
4,C57,tube_test,2022-09-06,3.0,"('3.1', '3.2')",3.2,3.1,3.2,3.1,1.0,...,"['3.2', '3.2', '3.2', '3.2', '3.2', '3.2', '3....","['3.1', '3.1', '3.1', '3.1', '3.1', '3.1', '3....",3.2,3.1,8,0,8,8,1.0,False


# Merging the RC Distance plots

In [28]:
# Changing column type to make it mergable
all_elo_score_history_df["tuple_animal_id"] = all_elo_score_history_df["tuple_animal_id"].apply(lambda x: ast.literal_eval(x))
all_elo_score_history_df["subject_id"] = all_elo_score_history_df["subject_id"].astype(str)
all_elo_score_history_df["date"] = all_elo_score_history_df["date"].astype(str)

In [29]:
# Changing column type to make it mergable
RC_DISTANCE_DF["date"] = RC_DISTANCE_DF["date"].astype(str)
RC_DISTANCE_DF["rc_trial_number"] = RC_DISTANCE_DF["rc_trial_number"].astype(float)

In [30]:
all_elo_score_history_df = pd.merge(left=all_elo_score_history_df, right=RC_DISTANCE_DF, left_on=["date", "trial_number", "tuple_animal_id", "subject_id"], right_on=["date", "rc_trial_number", "all_subj", "subj_id"], how="outer")

In [34]:
all_elo_score_history_df.columns

Index(['strain', 'experiment_type', 'date', 'cage_#', 'tuple_animal_id',
       'subject_id', 'agent_id', 'winner', 'loser', 'win_draw_loss',
       ...
       'track_id', 'rc_trial_number', 'thorax_to_reward_port_baseline_slices',
       'thorax_to_reward_port_trial_slices',
       'scaled_coordinates_baseline_slices', 'scaled_coordinates_trial_slices',
       'distance_between_subject_baseline_slices',
       'distance_between_subject_trial_slices',
       'thorax_velocity_baseline_slices', 'thorax_velocity_trial_slices'],
      dtype='object', length=103)

In [38]:
for col in all_elo_score_history_df.columns:
    print(col)

strain
experiment_type
date
cage_#
tuple_animal_id
subject_id
agent_id
winner
loser
win_draw_loss
original_elo_rating
updated_elo_rating
subject_ranking
agent_ranking
subject_weight
subject_percent_body_weight
subject_amount_fed
agent_weight
agent_percent_body_weight
agent_amount_fed
pairing_index
trial_number
action
left_number_of_spots
right_number_of_spots
spot_number_difference
percent_difference
hco_tuple_animal_id
hco_winner
hco_loser
hco_averaged_winner
hco_averaged_loser
hco_averaged_winner_win_count
hco_averaged_loser_win_count
hco_count_difference
hco_match_count
hco_percent_win
hco_percentage_tie
rc_tuple_animal_id
rc_winner
rc_loser
rc_average_number_of_switches
rc_winner_no_ties
rc_loser_no_ties
rc_averaged_winner
rc_averaged_loser
rc_averaged_winner_win_count
rc_averaged_loser_win_count
rc_tie_count
rc_all_match_count_including_ties
rc_averaged_winner_win_count_minus_loser_win_count
rc_win_to_win_plus_lost_ratio
rc_win_to_all_ratio
rc_is_win_to_win_and_loss_ratio_tie
rc_t

In [31]:
all_elo_score_history_df.head()

Unnamed: 0,strain,experiment_type,date,cage_#,tuple_animal_id,subject_id,agent_id,winner,loser,win_draw_loss,...,track_id,rc_trial_number,thorax_to_reward_port_baseline_slices,thorax_to_reward_port_trial_slices,scaled_coordinates_baseline_slices,scaled_coordinates_trial_slices,distance_between_subject_baseline_slices,distance_between_subject_trial_slices,thorax_velocity_baseline_slices,thorax_velocity_trial_slices
0,C57,tube_test,2022-09-06,1.0,"(1.1, 1.2)",1.1,1.2,1.1,1.2,1.0,...,,,,,,,,,,
1,C57,tube_test,2022-09-06,1.0,"(1.1, 1.2)",1.2,1.1,1.1,1.2,0.0,...,,,,,,,,,,
2,C57,tube_test,2022-09-06,2.0,"(2.1, 2.2)",2.1,2.2,2.1,2.2,1.0,...,,,,,,,,,,
3,C57,tube_test,2022-09-06,2.0,"(2.1, 2.2)",2.2,2.1,2.1,2.2,0.0,...,,,,,,,,,,
4,C57,tube_test,2022-09-06,3.0,"(3.1, 3.2)",3.2,3.1,3.2,3.1,1.0,...,,,,,,,,,,


In [32]:
all_elo_score_history_df.tail()

Unnamed: 0,strain,experiment_type,date,cage_#,tuple_animal_id,subject_id,agent_id,winner,loser,win_draw_loss,...,track_id,rc_trial_number,thorax_to_reward_port_baseline_slices,thorax_to_reward_port_trial_slices,scaled_coordinates_baseline_slices,scaled_coordinates_trial_slices,distance_between_subject_baseline_slices,distance_between_subject_trial_slices,thorax_velocity_baseline_slices,thorax_velocity_trial_slices
3723,CD1,home_cage_observation,2022-10-05,5.0,"(5.1, 5.3)",5.1,5.3,5.1,5.3,1.0,...,,,,,,,,,,
3724,CD1,home_cage_observation,2022-10-05,5.0,"(5.1, 5.3)",5.3,5.1,5.1,5.3,0.0,...,,,,,,,,,,
3725,CD1,home_cage_observation,2022-10-05,5.0,"(5.1, 5.3)",5.3,5.1,5.1,5.3,0.0,...,,,,,,,,,,
3726,CD1,home_cage_observation,2022-10-05,5.0,"(5.1, 5.3)",5.3,5.1,5.1,5.3,0.0,...,,,,,,,,,,
3727,CD1,home_cage_observation,2022-10-05,5.0,"(5.1, 5.3)",5.3,5.1,5.1,5.3,0.0,...,,,,,,,,,,


In [37]:
all_elo_score_history_df.shape

(3728, 103)

In [33]:
all_elo_score_history_df.to_pickle("./elo_score_and_rc_distance.pkl")

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=a4490980-3f6a-4f44-80eb-ebd789a5b21f' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>