In [1]:
import os
import pandas as pd
import scipy.io
path = "../data/photometry_analog/PV Population/"
os.chdir(path)
pd.set_option('display.float_format', lambda x: '%.4f' % x)

In [68]:
mat = scipy.io.loadmat('GCAMP_4268_1600-7.mat')
mat = mat['GCAMP']

In [70]:
pd.DataFrame(mat['gcampdata_timestamps'][0][0])

Unnamed: 0,0
0,48157341.5552
1,48157392.5504
2,48157441.3824
3,48157491.5200
4,48157542.1568
...,...
108586,53596230.5152
108587,53596280.8576
108588,53596330.6496
108589,53596381.1328


In [71]:
# get filename info
def get_filename_info(df, filename):
    filename_info = filename.split("_")
    filename_info_sub = filename_info[2].split("-")
    df['subject'] = filename_info[1].strip()
    df['threshold'] = int(filename_info_sub[0].strip())
    df['day'] = filename_info_sub[1].replace(".mat", "").strip()

    return df

In [72]:
# get behavioral information
def get_beh(mat, filename):
    beh_data = mat['GCAMP']['beh_data'][0][0]
    beh_data = pd.DataFrame(beh_data)

    new_header = beh_data.iloc[0] #grab the first row for the header
    beh_data = beh_data[1:] #take the data less the header row
    beh_data.columns = new_header

    beh_data = get_filename_info(beh_data, filename)

    return beh_data


In [73]:
# get photometry information
def get_photo(mat, filename):
    photo_data = pd.DataFrame()
    photo_data['timestamp'] = pd.DataFrame(mat['GCAMP']['gcampdata_timestamps'][0][0])
    photo_data['gcamp'] = pd.DataFrame(mat['GCAMP']['gcampdata'][0][0])

    photo_data = get_filename_info(photo_data, filename)

    return photo_data

In [74]:
behavioral_data = pd.DataFrame()
photometry_data = pd.DataFrame()
for file in os.listdir(os.curdir):
    mat = scipy.io.loadmat(os.path.join(os.curdir, file))
    print(file)
    # behavioral dataframe
    beh_data = get_beh(mat, file)
    behavioral_data = pd.concat([behavioral_data, beh_data])
    # photometry dataframe
    photo_data = get_photo(mat, file)
    photometry_data = pd.concat([photometry_data, photo_data])

behavioral_data.columns = ["timestamp", "lp", "he", "rew", "subject", "threshold", "day"]

GCAMP_4268_1600-7.mat
GCAMP_3316_1600-5.mat
GCAMP_3316_1600-4.mat
GCAMP_4268_1600-6.mat
GCAMP_4268_1600-4.mat
GCAMP_3316_1600-6.mat
GCAMP_4268_1600-5.mat
GCAMP_4268_1600-1.mat
GCAMP_3316_1600-3.mat
GCAMP_3316_1600-2.mat
GCAMP_4268_1600-2.mat
GCAMP_4268_1600-3.mat
GCAMP_4269_1600-8.mat
GCAMP_3317_1600-3.mat
GCAMP_3203_1600-3.mat
GCAMP_4269_1600-1.mat
GCAMP_3203_1600-2.mat
GCAMP_4269_1600-3.mat
GCAMP_3317_1600-1.mat
GCAMP_3317_1600-5.mat
GCAMP_4269_1600-7.mat
GCAMP_4269_1600-6.mat
GCAMP_4269_1600-5.mat
GCAMP_3201_1600-2.mat
GCAMP_3315_1600-3.mat
GCAMP_3204_1600-6.mat
GCAMP_3204_1600-4.mat
GCAMP_3204_1600-5.mat
GCAMP_4268_1600-8.mat
GCAMP_3201_1600-4.mat
GCAMP_3201_1600-5.mat
GCAMP_3315_1600-6.mat
GCAMP_3201_1600-6.mat


In [75]:
behavioral_data.head()

Unnamed: 0,timestamp,lp,he,rew,subject,threshold,day
1,48156682.8544,0.0,0.0,0.0,4268,1600,7
2,48156686.848,0.0,0.0,0.0,4268,1600,7
3,48156690.9696,0.0,0.0,0.0,4268,1600,7
4,48156690.9696,0.0,0.0,0.0,4268,1600,7
5,48156695.0912,0.0,0.0,0.0,4268,1600,7


In [76]:
len(behavioral_data)

51414779

In [84]:
behavioral_data = behavioral_data.drop_duplicates()
len(behavioral_data)

48529893

In [85]:
behavioral_data.to_csv("../processed/analog_data.csv", index=False)

In [77]:
photometry_data.head()

Unnamed: 0,timestamp,gcamp,subject,threshold,day
0,48157341.5552,0.0,4268,1600,7
1,48157392.5504,0.054,4268,1600,7
2,48157441.3824,0.0739,4268,1600,7
3,48157491.52,0.2379,4268,1600,7
4,48157542.1568,0.2559,4268,1600,7


In [78]:
len(photometry_data)

3691184

In [86]:
photometry_data = photometry_data.drop_duplicates()
len(photometry_data)

3691184

In [87]:
photometry_data.to_csv("../processed/photometry_data.csv", index=False)