# Title of notebook

Brief 1-2 sentence description of notebook.

In [1]:
# Imports of all used packages and libraries
import copy
import re
import os
import sys
import string
import glob
import ast
from collections import Counter
from collections import defaultdict
import warnings

In [2]:
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
import git
# Getting the path of the root directory so that we can import repo specific functions
git_repo_object = git.Repo('.', search_parent_directories=True)
git_repo_directory = git_repo_object.working_tree_dir

In [4]:
# Setting path so that we can import functions
sys.path.append(os.path.join(git_repo_directory, "src"))

In [5]:
os.path.join(git_repo_directory, "src")


'/nancy/projects/social_dominance_active_inference/src'

In [6]:
from elorating import calculation
from elorating import dataframe

In [7]:
# Increase size of plot in jupyter

plt.rcParams["figure.figsize"] = (18,10)

## Inputs & Data

Explanation of each input and where it comes from.

In [8]:
# Inputs and Required data loading
# input varaible names are in all caps snake case
# Whenever an input changes or is used for processing 
# the vairables are all lower in snake case
OUTPUT_DIR = r"./proc" # where data is saved should always be shown in the inputs

In [9]:
"elo_rating_spreadsheet_output_directory".upper()

'ELO_RATING_SPREADSHEET_OUTPUT_DIRECTORY'

In [10]:
PROTOCOL_NAME = "reward_competition"
PREFIX_NAME = "rc"
COHORT_NAME = "pilot_3"
RAW_DATA_FILE_PATH = "../../data/pilot_3/pilot_3_reward_competition.xlsx"
INPUTTED_SHEET_NAME = 'Master List'
ALL_HEADER_ROW = 0
SCORING_DF = pd.read_excel(RAW_DATA_FILE_PATH, sheet_name=INPUTTED_SHEET_NAME, header=ALL_HEADER_ROW)
CAGE_TO_STRAIN = {"1": "C57", "2": "C57", "3": "C57", "4": "CD1", "5": "CD1", "6": "CD1"}

In [11]:
ELO_RATING_SPREADSHEET_OUTPUT_DIRECTORY = os.path.join(OUTPUT_DIR, "elo_rating_spread_sheets")

In [12]:
os.makedirs(ELO_RATING_SPREADSHEET_OUTPUT_DIRECTORY, exist_ok=True)

In [13]:
SCORING_DF.head()

Unnamed: 0,Date,Cage,Box,Match,Scorer,Trial 1 Winner,Trial 2 Winner,Trial 3 Winner,Trial 4 Winner,Trial 5 Winner,...,Trial 13 Winner,Trial 14 Winner,Trial 15 Winner,Trial 16 Winner,Trial 17 Winner,Trial 18 Winner,Trial 19 Winner,Mouse 1 Wins,Mouse 2 Wins,Ties
0,2022-10-03,1,1,1.1 vs 1.2,Jocelyn,1.1,1.2,1.2,1.1,tie,...,1.1,tie,1.2,1.2,1.2,1.2,1.2,6,9,4
1,2022-10-03,1,2,1.3 vs 1.4,Jocelyn,1.3,1.4,1.3,1.4,1.4,...,1.3,tie,1.4,1.3,1.3,1.4,tie,8,8,3
2,2022-10-03,2,3,2.1 vs 2.2,Jocelyn,2.2,2.1,2.1,2.1,2.1,...,2.2,2.2,2.1,2.2,2.1,2.1,2.2,13,5,1
3,2022-10-03,2,4,2.3 vs 2.4,Jocelyn,2.4,2.4,2.4,2.3,2.4,...,2.3,2.3,2.4,2.4,2.4,2.4,2.3,6,13,0
4,2022-10-03,3,1,3.1 vs 3.2,Meghan,3.2,3.1,3.1,3.2,3.2,...,3.1,3.1,3.1,3.1,3.1,3.1,3.1,14,5,0


## Outputs

Describe each output that the notebook creates. 

- Is it a plot or is it data?

- How valuable is the output and why is it valuable or useful?

## Processing

Describe what is done to the data here and how inputs are manipulated to generate outputs. 

In [14]:
# As much code and as many cells as required
# includes EDA and playing with data
# GO HAM!

# Ideally functions are defined here first and then data is processed using the functions

# function names are short and in snake case all lowercase
# a function name should be unique but does not have to describe the function
# doc strings describe functions not function names


- Renaming all the columns to the lower case and space removed version

In [15]:
column_name_to_standarized = defaultdict(dict)

for col in SCORING_DF:
    # Making the column name lower case and removing the spaces
    column_name_to_standarized[col] = "_".join(str(col).lower().strip().split(" "))
column_name_to_standarized = column_name_to_standarized

In [16]:
SCORING_DF = SCORING_DF.rename(columns=column_name_to_standarized)

In [17]:
SCORING_DF = SCORING_DF.drop([col for col in SCORING_DF.columns if "wins" in col or "ties" in col], axis=1, errors="ignore")

In [18]:
SCORING_DF = SCORING_DF.dropna(subset=["trial_1_winner"])

In [19]:
SCORING_DF.head()

Unnamed: 0,date,cage,box,match,scorer,trial_1_winner,trial_2_winner,trial_3_winner,trial_4_winner,trial_5_winner,...,trial_10_winner,trial_11_winner,trial_12_winner,trial_13_winner,trial_14_winner,trial_15_winner,trial_16_winner,trial_17_winner,trial_18_winner,trial_19_winner
0,2022-10-03,1,1,1.1 vs 1.2,Jocelyn,1.1,1.2,1.2,1.1,tie,...,1.2,tie,1.2,1.1,tie,1.2,1.2,1.2,1.2,1.2
1,2022-10-03,1,2,1.3 vs 1.4,Jocelyn,1.3,1.4,1.3,1.4,1.4,...,1.3,1.4,1.4,1.3,tie,1.4,1.3,1.3,1.4,tie
2,2022-10-03,2,3,2.1 vs 2.2,Jocelyn,2.2,2.1,2.1,2.1,2.1,...,2.1,2.1,2.1,2.2,2.2,2.1,2.2,2.1,2.1,2.2
3,2022-10-03,2,4,2.3 vs 2.4,Jocelyn,2.4,2.4,2.4,2.3,2.4,...,2.4,2.4,2.4,2.3,2.3,2.4,2.4,2.4,2.4,2.3
4,2022-10-03,3,1,3.1 vs 3.2,Meghan,3.2,3.1,3.1,3.2,3.2,...,3.1,3.1,3.1,3.1,3.1,3.1,3.1,3.1,3.1,3.1


## Add IDs of both animals and strain as a column

In [20]:
SCORING_DF["animal_ids"] = SCORING_DF["match"].apply(lambda x: tuple(sorted([all_ids.strip() for all_ids in re.findall(r"[-+]?(?:\d*\.\d+|\d+)", x)])))

In [21]:
SCORING_DF["strain"] = SCORING_DF["cage"].astype(str).map(CAGE_TO_STRAIN)

In [22]:
SCORING_DF.head()

Unnamed: 0,date,cage,box,match,scorer,trial_1_winner,trial_2_winner,trial_3_winner,trial_4_winner,trial_5_winner,...,trial_12_winner,trial_13_winner,trial_14_winner,trial_15_winner,trial_16_winner,trial_17_winner,trial_18_winner,trial_19_winner,animal_ids,strain
0,2022-10-03,1,1,1.1 vs 1.2,Jocelyn,1.1,1.2,1.2,1.1,tie,...,1.2,1.1,tie,1.2,1.2,1.2,1.2,1.2,"(1.1, 1.2)",C57
1,2022-10-03,1,2,1.3 vs 1.4,Jocelyn,1.3,1.4,1.3,1.4,1.4,...,1.4,1.3,tie,1.4,1.3,1.3,1.4,tie,"(1.3, 1.4)",C57
2,2022-10-03,2,3,2.1 vs 2.2,Jocelyn,2.2,2.1,2.1,2.1,2.1,...,2.1,2.2,2.2,2.1,2.2,2.1,2.1,2.2,"(2.1, 2.2)",C57
3,2022-10-03,2,4,2.3 vs 2.4,Jocelyn,2.4,2.4,2.4,2.3,2.4,...,2.4,2.3,2.3,2.4,2.4,2.4,2.4,2.3,"(2.3, 2.4)",C57
4,2022-10-03,3,1,3.1 vs 3.2,Meghan,3.2,3.1,3.1,3.2,3.2,...,3.1,3.1,3.1,3.1,3.1,3.1,3.1,3.1,"(3.1, 3.2)",C57


- Creating a subfolder to put the Elo rating Spreadsheets

In [23]:
file_name = "{}_{}_trial_as_columns_cages_{}_date_{}_{}.csv".format(cohort_name, protocol_name, all_cages, earliest_date, latest_date)

SCORING_DF.to_csv(os.path.join(elo_rating_spreadsheet_output_directory, file_name))

NameError: name 'cohort_name' is not defined

In [None]:
SCORING_DF = SCORING_DF.drop(columns=["sheet_name", "scorer"], errors="ignore")

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=a4490980-3f6a-4f44-80eb-ebd789a5b21f' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>