In [110]:
import os
import pandas as pd
path = "/Users/jorgerag/Documents/UCSD/courses/capstone/data/photometry_analog"
os.chdir(path)
pd.set_option('display.float_format', lambda x: '%.4f' % x)

In [101]:
# Import data
beha_data = pd.read_csv("processed/analog_data.csv")
photo_data = pd.read_csv("processed/photometry_data.csv")

In [102]:
# Duration of lever press
def get_duration_lp(beh_dict):
    start_time = 0
    for i in range(0, len(beh_dict)):
        if i != len(beh_dict)-1:
            if beh_dict[i]['lp'] == 1 and start_time == 0:
                start_time = beh_dict[i]['timestamp']
            elif beh_dict[i+1]['lp'] == 0 and start_time != 0:
                beh_dict[i]['lp_start_time'] = start_time
                beh_dict[i]['lp_end_time'] = beh_dict[i+1]['timestamp']
                beh_dict[i]['lp_duration'] = beh_dict[i+1]['timestamp'] - start_time - 20 # correction found in the matlab code
                start_time = 0
    return beh_dict

# Inter Press Interval
def get_ipi(beh_dict):
    start_time = 0
    ipi = float('NaN')
    for i in range(0, len(beh_dict)):
        if i != len(beh_dict)-1:
            if beh_dict[i]['lp'] == 1 and beh_dict[i+1]['lp'] == 0 and start_time == 0:
                start_time = beh_dict[i+1]['timestamp']
                beh_dict[i]['ipi'] = ipi
                ipi = float('NaN')
            elif beh_dict[i]['lp'] == 1 and start_time != 0:
                ipi = beh_dict[i]["timestamp"] - start_time
                start_time = 0
    return beh_dict

# head entry duration
def get_he(beh_dict):
    start_he = 0
    he_duration = 0
    for i in range(0, len(beh_dict)):
        if i != len(beh_dict)-1:
            if "lp_duration" in beh_dict[i]:
                beh_dict[i]["past_he_duration"] = he_duration
                he_duration = 0
            elif beh_dict[i]['he'] == 1 and start_he == 0:
                start_he = beh_dict[i]['timestamp']
            elif beh_dict[i]['he'] == 0 and start_he != 0:
                he_duration = he_duration + (beh_dict[i]['timestamp'] - start_he)
                start_he = 0
    return beh_dict

# reward dummy
def get_rew(beh_dict):
    rew_dummy = 0
    for i in range(0, len(beh_dict)):
        if "lp_duration" in beh_dict[i]:
            beh_dict[i]["rew_dummy"] = rew_dummy
            rew_dummy = 0
        elif beh_dict[i]['rew'] == 1:
            rew_dummy = 1
    return beh_dict

# Get successful lp
def met_lp(x):
    if x["lp_duration"] >= x["threshold"]:
        return 1
    else:
        return 0

# Get gcamp before lp
def get_gcamp_total(model_dict):
    for i in range(0, len(model_dict)):
        if i == 0:
            gcamp_df = photo_data[photo_data["timestamp"] < model_dict[i]["timestamp"]]
            duration = float('NaN')
        else:
            gcamp_df = photo_data[(photo_data["timestamp"] <= model_dict[i]["timestamp"]) & (model_dict[i-1]["timestamp"] < photo_data["timestamp"])]
            duration = (model_dict[i]["timestamp"] - model_dict[i-1]["timestamp"])/1000
        model_dict[i]["gcamp_total"] = gcamp_df["gcamp"].sum()
        model_dict[i]["gcamp_total_per_sec"] = gcamp_df["gcamp"].sum() / duration
    return model_dict

# Get gcamp during lp
def get_gcamp_lp(model_dict):
    for i in range(0, len(model_dict)):
        gcamp_df = photo_data[(photo_data["timestamp"]>= model_dict[i]["lp_start_time"]) & (photo_data["timestamp"] <= model_dict[i]["lp_end_time"])]
        model_dict[i]["gcamp_lp"] = gcamp_df["gcamp"].sum()
        model_dict[i]["gcamp_lp_per_sec"] = gcamp_df["gcamp"].sum()/(model_dict[i]["lp_duration"]/1000)
    return model_dict

In [115]:
# Get unique subject, threshold and day combinations
unique_obj = beha_data.groupby(["subject", "threshold", "day"]).size().reset_index()
unique_obj = unique_obj.to_dict('records')

# Iterate over every sesion to create final dataframe
final_model_data = pd.DataFrame()
for elem in unique_obj:
    beh_df = beha_data[(beha_data['subject'] == elem["subject"]) & (beha_data['threshold'] == elem["threshold"]) & (beha_data['day'] == elem["day"])]
    photo_df = photo_data[(photo_data['subject'] == elem["subject"]) & (photo_data['threshold'] == elem["threshold"]) & (photo_data['day'] == elem["day"])]

    beh_dict = beh_df.to_dict('records')
    beh_dict = get_duration_lp(beh_dict)
    beh_dict = get_ipi(beh_dict)
    beh_dict = get_he(beh_dict)
    beh_dict = get_rew(beh_dict)

    model_df = pd.DataFrame(beh_dict)
    # Filter everything but lp 
    model_df = model_df.loc[pd.notna(model_df['lp_duration'])]
    # LP met
    model_df["lp_met"] = model_df.apply(met_lp, axis=1)

    model_dict = model_df.to_dict('records')
    model_dict = get_gcamp_total(model_dict)
    model_dict = get_gcamp_lp(model_dict)
    model_df = pd.DataFrame(model_dict)
    model_df['order'] = range(1, len(model_df) + 1)
    model_df = model_df[["order", "subject", "threshold", "day", "lp_duration", "lp_met", "ipi", "past_he_duration", "rew_dummy", "gcamp_total", "gcamp_total_per_sec", "gcamp_lp", "gcamp_lp_per_sec"]]
    final_model_data = pd.concat([final_model_data, model_df])


Unnamed: 0,subject,threshold,day,0
0,3201,1600,2,1710489
1,3201,1600,4,1725086
2,3201,1600,5,1802049
3,3201,1600,6,1746508
4,3203,1600,2,1707875
5,3203,1600,3,2133150
6,3204,1600,4,1726606
7,3204,1600,5,1809192
8,3204,1600,6,1739677
9,3315,1600,3,1786402


In [None]:
final_model_data

In [99]:
model_df.to_csv("processed/model_data.csv", index=False)