# Deepmind TurnerLab (Pillar IV) Dataset 06-07-2023

**Data Source:** *16FlYMaze System, Turner Lab, HHMI Janelia Research Campus*

**Collected By:** *Kaitlyn Boone, Aparna Dev, PTR, HHMI Janelia Research Campus*

**Code Author:** *Rishika Mohanta*


In [1]:
# LIBRARY IMPORTS

# import libraries
import numpy as np                      # for numerical operations
import pandas as pd                     # for data manipulation
import matplotlib.pyplot as plt         # for plotting
import scipy.optimize as opt            # for optimization
from joblib import Parallel, delayed    # for parallelization
from tqdm.notebook import tqdm          # for progress bar
import pickle                           # for saving and loading
import os                               # for file operations

# import cognitive models
from cogpolicy import *     # Cognitive Policy Learning models
from cogq import *          # Cognitive Q-learning models

In [2]:
data_path = 'data/dmData_06-07-2023/' # path to data files
fit_path = 'fitted_models/dmData_06-07-2023/' # path to fitted models
quality_control = 'full' # whether to perform quality control (valid options: minimal, full, none)

In [9]:
# DATA LOADING

# load data
choice_data = np.loadtxt(data_path + 'choices.csv', delimiter=',')
reward_data = np.loadtxt(data_path + 'rewards.csv', delimiter=',')

# turn into integers
choice_data = choice_data.astype(int)
reward_data = reward_data.astype(int)

if choice_data.shape != reward_data.shape:
    raise ValueError('Sizes do not match.')

N = choice_data.shape[0]    # number of flies

print("Data loaded successfully with N = {} flies and {} maximum trials".format(N, choice_data.shape[1]))

# metadata
metadata = pd.read_csv(data_path + 'metadata.csv')

Data loaded successfully with N = 507 flies and 1010 maximum trials


In [10]:
# QUALITY CONTROL

if quality_control == 'minimal':
    qc = np.loadtxt(data_path + 'quality_control.csv', delimiter=',').astype(bool)
    choice_data = choice_data[qc]
    reward_data = reward_data[qc]
    metadata = metadata[qc]
if quality_control == 'full':
    qc = np.loadtxt(data_path + 'quality_control.csv', delimiter=',').astype(bool)
    metadata = metadata[qc].groupby('Fly Experiment').head(3)
    choice_data = choice_data[metadata.index]
    reward_data = reward_data[metadata.index]
    metadata.reset_index(drop=True, inplace=True)

print("{}/{} ({}) flies passed quality control".format(choice_data.shape[0], N, "{:0.2f}".format(choice_data.shape[0]/N*100)))

N = choice_data.shape[0]    # number of flies

KeyError: "None of [Int64Index([  0,   1,   2,   3,   4,   9,  10,  11,  12,  14,\n            ...\n            496, 497, 498, 499, 500, 501, 503, 504, 505, 506],\n           dtype='int64', length=336)] are in the [columns]"

## Preliminary Data Analysis

In [7]:
metadata

Unnamed: 0,Fly Experiment,Arena,Folder,Experiment Start Time,Starvation Time,Odor 1,Odor 2
0,exp11.csv,0,../data/40hr_starvation_deepmind_2023-05-11_09-34,2023-05-11 09:35:12,2023-05-09 17:30:00,MHO 1:1000,HAL 1:1000
1,exp26_reciprocal.csv,0,../data/40hr_starvation_deepmind_2023-06-02_09-49,2023-06-02 09:50:04,2023-06-02 08:30:00,MHO 1:1000,HAL 1:1000
2,exp2.csv,0,../data/41hr_starvation_deepmind_2023-05-04_09-36,2023-05-04 09:42:29,2023-05-04 08:30:00,MHO 1:1000,HAL 1:1000
3,exp5.csv,0,../data/41hr_starvation_deepmind_2023-05-25_10-07,2023-05-25 10:10:42,2023-05-23 17:25:00,MHO 1:1000,HAL 1:1000
4,exp36.csv,0,../data/41hr_starvation_deepmind_2023-06-09_09-45,2023-06-09 09:46:24,2023-06-07 17:00:00,MHO 1:1000,HAL 1:1000
...,...,...,...,...,...,...,...
502,exp56.csv,15,../data/45hr_starvation_deepmind_2023-07-05_13-28,2023-07-05 13:38:38,2023-07-03 17:00:00,MHO 1:1000,HAL 1:1000
503,exp43_reciprocal.csv,15,../data/46hr_starvation_deepmind_2023-06-14_13-48,2023-06-14 13:50:06,2023-06-12 16:00:00,MHO 1:1000,HAL 1:1000
504,exp49.csv,15,../data/46hr_starvation_deepmind_2023-06-15_13-48,2023-06-15 13:49:48,2023-06-13 15:45:00,MHO 1:1000,HAL 1:1000
505,exp38_reciprocal.csv,15,../data/46hr_Starvation_deepmind_2023-06-22_14-20,2023-06-22 14:22:30,2023-06-20 16:00:00,MHO 1:1000,HAL 1:1000
