In [1]:
from Analysis_functions import *

In [2]:
import pickle
import matplotlib.pyplot as plt

In [3]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [4]:
with open("activity_full_relax.pickle", "rb") as f:
    activity_full = pickle.load(f)

In [5]:
with open("tp_full_relax.pickle", "rb") as f:
    ts_full = pickle.load(f)

In [6]:
with open("activity_fam_relax.pickle", "rb") as f:
    activity_fam = pickle.load(f)

In [7]:
with open("tp_fam_relax.pickle", "rb") as f:
    ts_fam = pickle.load(f)

In [8]:
file_no = 12

In [9]:
full_rel_info = pd.read_csv('full_rel_info.csv', index_col= 'i')

In [10]:
fam_rel_info = pd.read_csv('fam_rel_info.csv', index_col= 'i')

In [11]:
drk_hrs1 = list(range(21, 21+12))
drk_hrs2 = list(range(21+24, 21+24+12))
drk_hrs3 = list(range(21+48, 21+48+12))
drk_hrs4 = list(range(21+72, 21+72+12))

In [12]:
drk_hrs = drk_hrs1 + drk_hrs2 + drk_hrs3 + drk_hrs4

In [13]:
def act_bin (activity,frame_times, ts = 1):
    
    '''
    Given a K-sized array of activity, creates a k-sized array of 
    mean and sd where k<K, at 1 hr timepoints
    '''
    activity,del_inds = exclude_dead_inactive(activity, 32)
    
    ll = np.floor(frame_times[0])
    ul = np.ceil(frame_times[-1])
    
    time_points = np.arange(ll,ul,ts)
    num_indivs = activity.shape[1]
    
    sum_act_plot = np.zeros((len(time_points),num_indivs))
    
    for a in range(num_indivs):
        activity_ind = activity[:,a]
        for i in range(len(time_points)-1):
            lower_bound = time_points[i]
            upper_bound = time_points[i+1]
        
            lower_bound_i = bisect.bisect_left(frame_times, lower_bound)
            upper_bound_i = bisect.bisect_right(frame_times, upper_bound, lo=lower_bound_i)
    
            sum_act = np.sum(activity_ind[lower_bound_i:upper_bound_i])
        
            sum_act_plot[i,a] = sum_act
    
    return sum_act_plot, time_points,del_inds


In [35]:
def generate_pop_data(pop_name,sex,relaxation,info_df, activity_df, tp_df):

    pop_activity = []
    pop_ts = []
    boxes = []
    trays = []
    dates = []

    for i in range(file_no):  
        label_fname = './' + str(relaxation) + '/label_table/' + str(int(i+1)) + '.csv'
        dfi = pd.read_csv(label_fname,names = ['Pop','Sex','Start','End'], header = 0)
        choose = dfi.loc[(dfi['Pop'] == pop_name) & (dfi['Sex'] == sex)]
        if choose.empty == False:
            start_ind = int(choose['Start'])-1
            end_ind = int(choose['End'])
            n_ind = end_ind - start_ind
            
            activity_i = activity_df[i+1][:,start_ind:end_ind]
            times_i = tp_df[i+1]

            box_i = np.repeat(info_df['box'][i+1],n_ind)
            boxes.append(box_i)

            tray_i = np.repeat(info_df['tray'][i+1],n_ind)
            trays.append(tray_i)

            date_i = np.repeat(info_df['date'][i+1],n_ind)
            dates.append(date_i)
            
            pop_activity.append(activity_i)
            pop_ts.append(times_i)
            
    
    #bin activity by 1 hrs
    pop_act_plot_0,pop_tp_0,di =  act_bin(pop_activity[0],pop_ts[0])
    boxes[0] = boxes[0][0:pop_act_plot_0.shape[1]]
    trays[0] = trays[0][0:pop_act_plot_0.shape[1]]
    dates[0] = dates[0][0:pop_act_plot_0.shape[1]]

    pop_act_plot_1,pop_tp_1,di =  act_bin(pop_activity[1],pop_ts[1])
    boxes[1] = boxes[1][0:pop_act_plot_1.shape[1]]
    trays[1] = trays[1][0:pop_act_plot_1.shape[1]]
    dates[1] = dates[1][0:pop_act_plot_1.shape[1]]
    
    pop_act_plot_2,pop_tp_2,di =  act_bin(pop_activity[2],pop_ts[2])
    boxes[2] = boxes[2][0:pop_act_plot_2.shape[1]]
    trays[2] = trays[2][0:pop_act_plot_2.shape[1]]
    dates[2] = dates[2][0:pop_act_plot_2.shape[1]]
    
    pop_act_plot_3,pop_tp_3,di =  act_bin(pop_activity[3],pop_ts[3])
    boxes[3] = boxes[3][0:pop_act_plot_3.shape[1]]
    trays[3] = trays[3][0:pop_act_plot_3.shape[1]]
    dates[3] = dates[3][0:pop_act_plot_3.shape[1]]

    #combine binned data
    pop_act = np.concatenate((pop_act_plot_0,pop_act_plot_1,pop_act_plot_2,pop_act_plot_3),axis =1)
    pop_tp = pop_tp_0
    boxes = np.concatenate(boxes)
    trays = np.concatenate(trays)
    dates = np.concatenate(dates)

    #Generate dataframe from data:
    df_act = pd.DataFrame(pop_act.T, columns = pop_tp)
    df_act['box'] = boxes
    df_act['tray'] = trays
    df_act['date'] = dates
    df_act.index.name = 'individual'

    df = df_act.reset_index().melt(id_vars = ['individual','box','tray','date'], var_name = 'hour',value_name='activity')
    df['pop'] = pop_name

    df['individual'] = pop_name + sex + df['individual'].astype(str)
    
    df['sex'] = sex
    df['light'] = np.where(df['hour'].isin(drk_hrs), 0, 1)
    df['relaxation'] = relaxation
    df['relaxation'] = df['relaxation'].str.removesuffix('_relax')

    df['selection'] = df['pop'].str.slice(0,2)
    df['block'] = df['pop'].str.slice(2,3)
    df['selection'].replace({'VS':'sel','VC':'con'}, inplace=True)
    df.drop('pop', axis=1, inplace=True)
    
    return df

In [36]:
def generate_pop_data_fam(pop_name,relaxation,info_df, activity_df, tp_df):

    pop_activity = []
    pop_ts = []
    boxes = []
    trays = []
    dates = []
    sexes = []

    for i in range(file_no):  
        label_fname = './' + str(relaxation) + '/label_table/' + str(int(i+1)) + '.csv'
        dfi = pd.read_csv(label_fname,names = ['Fam','Pop','Start','End'], header = 0)
        choose = dfi.loc[(dfi['Pop'] == pop_name)]
        choose.reset_index(inplace=True,drop=True)
    
        if choose.empty == False:
            #fam1
            start_ind = int(choose['Start'][0])-1
            end_ind = int(choose['End'][0])
            n_ind = end_ind - start_ind
            
            activity_i = activity_df[i+1][:,start_ind:end_ind]
            times_i = tp_df[i+1]
            box_i = np.repeat(info_df['box'][i+1],n_ind)
            boxes.append(box_i)
            tray_i = np.repeat(info_df['tray'][i+1],n_ind)
            trays.append(tray_i)
            date_i = np.repeat(info_df['date'][i+1],n_ind)
            dates.append(date_i)
            pop_activity.append(activity_i)
            pop_ts.append(times_i)

            sex_iF = np.repeat('F',n_ind/2)
            sex_iM = np.repeat('M',n_ind/2)
            sex_i = np.concatenate([sex_iF,sex_iM])
            sexes.append(sex_i)


            #fam2:
            start_ind2 = int(choose['Start'][1])-1
            end_ind2 = int(choose['End'][1])
            n_ind = end_ind2 - start_ind2
            activity_i = activity_df[i+1][:,start_ind2:end_ind2]
            times_i = tp_df[i+1]
            box_i = np.repeat(info_df['box'][i+1],n_ind)
            boxes.append(box_i)
            tray_i = np.repeat(info_df['tray'][i+1],n_ind)
            trays.append(tray_i)
            date_i = np.repeat(info_df['date'][i+1],n_ind)
            dates.append(date_i)
            pop_activity.append(activity_i)
            pop_ts.append(times_i)

            sex_iF = np.repeat('F',n_ind/2)
            sex_iM = np.repeat('M',n_ind/2)
            sex_i = np.concatenate([sex_iF,sex_iM])
            sexes.append(sex_i)
       
    
    #bin activity by 1 hrs
    parsed_acts = []
    parsed_tp = []
    
    for f in range(len(pop_activity)):
        act_parsed, tp_parsed,del_ind = act_bin(pop_activity[f],pop_ts[f])
        boxes[f] = boxes[f][0:act_parsed.shape[1]]
        trays[f] = trays[f][0:act_parsed.shape[1]]
        dates[f] = dates[f][0:act_parsed.shape[1]]     
        sexes[f] = np.delete(sexes[f],del_ind)

        parsed_acts.append(act_parsed)

    
    #combine binned data
    pop_act = np.concatenate(parsed_acts, axis = 1)
    pop_tp = tp_parsed
    

    boxes = np.concatenate(boxes)
    trays = np.concatenate(trays)
    dates = np.concatenate(dates)
    sexes = np.concatenate(sexes)

    #Generate dataframe from data:
    
    df_act = pd.DataFrame(pop_act.T, columns = pop_tp)
    df_act['box'] = boxes
    df_act['tray'] = trays
    df_act['date'] = dates
    df_act['sex'] = sexes
    
    df_act.index.name = 'individual'

    df = df_act.reset_index().melt(id_vars = ['individual','box','tray','date','sex'], var_name = 'hour',value_name='activity')
    
    df['pop'] = pop_name

    df['individual'] = pop_name + df['individual'].astype(str)

    df['light'] = np.where(df['hour'].isin(drk_hrs), 0, 1) 
    df['relaxation'] = relaxation
    df['relaxation'] = df['relaxation'].str.removesuffix('_relax')

    df['selection'] = df['pop'].str.slice(0,2)
    df['block'] = df['pop'].str.slice(2,3)
    df['selection'].replace({'VS':'sel','VC':'con'}, inplace=True)
    df.drop('pop', axis=1, inplace=True)
    
    return df

In [37]:
#full relax
VS1_df_F = generate_pop_data('VS1','F','full_relax',full_rel_info, activity_full, ts_full)
VS1_df_M = generate_pop_data('VS1','M','full_relax',full_rel_info, activity_full, ts_full)

VC1_df_F = generate_pop_data('VC1','F','full_relax',full_rel_info, activity_full, ts_full)
VC1_df_M = generate_pop_data('VC1','M','full_relax',full_rel_info, activity_full, ts_full)

VS2_df_F = generate_pop_data('VS2','F','full_relax',full_rel_info, activity_full, ts_full)
VS2_df_M = generate_pop_data('VS2','M','full_relax',full_rel_info, activity_full, ts_full)

VC2_df_F = generate_pop_data('VC2','F','full_relax',full_rel_info, activity_full, ts_full)
VC2_df_M = generate_pop_data('VC2','M','full_relax',full_rel_info, activity_full, ts_full)

VS3_df_F = generate_pop_data('VS3','F','full_relax',full_rel_info, activity_full, ts_full)
VS3_df_M = generate_pop_data('VS3','M','full_relax',full_rel_info, activity_full, ts_full)

VC3_df_F = generate_pop_data('VC3','F','full_relax',full_rel_info, activity_full, ts_full)
VC3_df_M = generate_pop_data('VC3','M','full_relax',full_rel_info, activity_full, ts_full)


In [41]:
VS1_df_F

Unnamed: 0,individual,box,tray,date,hour,activity,sex,light,relaxation,selection,block
0,VS1F0,1,1,10-02-2023,21.0,12586.241053,F,0,full,sel,1
1,VS1F1,1,1,10-02-2023,21.0,36975.655128,F,0,full,sel,1
2,VS1F2,1,1,10-02-2023,21.0,27565.288866,F,0,full,sel,1
3,VS1F3,1,1,10-02-2023,21.0,15864.534395,F,0,full,sel,1
4,VS1F4,1,1,10-02-2023,21.0,11298.004916,F,0,full,sel,1
...,...,...,...,...,...,...,...,...,...,...,...
10663,VS1F122,4,4,10-02-2023,104.0,0.000000,F,0,full,sel,1
10664,VS1F123,4,4,10-02-2023,104.0,0.000000,F,0,full,sel,1
10665,VS1F124,4,4,10-02-2023,104.0,0.000000,F,0,full,sel,1
10666,VS1F125,4,4,10-02-2023,104.0,0.000000,F,0,full,sel,1


In [42]:
#fam relax
VS1_df_fam = generate_pop_data_fam('VS1','fam_relax',fam_rel_info, activity_fam, ts_fam)

VC1_df_fam = generate_pop_data_fam('VC1','fam_relax',fam_rel_info, activity_fam, ts_fam)

VS2_df_fam = generate_pop_data_fam('VS2','fam_relax',fam_rel_info, activity_fam, ts_fam)

VC2_df_fam = generate_pop_data_fam('VC2','fam_relax',fam_rel_info, activity_fam, ts_fam)

VS3_df_fam = generate_pop_data_fam('VS3','fam_relax',fam_rel_info, activity_fam, ts_fam)

VC3_df_fam = generate_pop_data_fam('VC3','fam_relax',fam_rel_info, activity_fam, ts_fam)


In [43]:
VS1_df_fam

Unnamed: 0,individual,box,tray,date,sex,hour,activity,light,relaxation,selection,block
0,VS10,3,14,10-08-2023,F,21.0,12587.907297,0,fam,sel,1
1,VS11,3,14,10-08-2023,F,21.0,36887.108587,0,fam,sel,1
2,VS12,3,14,10-08-2023,F,21.0,27402.599897,0,fam,sel,1
3,VS13,3,14,10-08-2023,F,21.0,16800.505370,0,fam,sel,1
4,VS14,3,14,10-08-2023,F,21.0,10519.400470,0,fam,sel,1
...,...,...,...,...,...,...,...,...,...,...,...
19903,VS1232,6,15,10-08-2023,M,104.0,0.000000,0,fam,sel,1
19904,VS1233,6,15,10-08-2023,M,104.0,0.000000,0,fam,sel,1
19905,VS1234,6,15,10-08-2023,M,104.0,0.000000,0,fam,sel,1
19906,VS1235,6,15,10-08-2023,M,104.0,0.000000,0,fam,sel,1


In [44]:
dfs = [VS1_df_F,VS1_df_M,VS1_df_fam,VC1_df_F,VC1_df_M,VC1_df_fam,VS2_df_F,VS2_df_M,VS2_df_fam,VC2_df_F,VC2_df_M,VC2_df_fam,VS3_df_F,VS3_df_M,VS3_df_fam,VC3_df_F,VC3_df_M,VC3_df_fam]

In [45]:
activity_df = pd.concat(dfs,ignore_index=True)

In [46]:
activity_df = activity_df.loc[activity_df['activity'] != 0]

In [47]:
codes, uniques = pd.factorize(activity_df['individual'])

In [51]:
activity_df['ind_ID'] = codes + 1

In [52]:
activity_df

Unnamed: 0,individual,box,tray,date,hour,activity,sex,light,relaxation,selection,block,ind_ID
0,VS1F0,1,1,10-02-2023,21.0,12586.241053,F,0,full,sel,1,1
1,VS1F1,1,1,10-02-2023,21.0,36975.655128,F,0,full,sel,1,2
2,VS1F2,1,1,10-02-2023,21.0,27565.288866,F,0,full,sel,1,3
3,VS1F3,1,1,10-02-2023,21.0,15864.534395,F,0,full,sel,1,4
4,VS1F4,1,1,10-02-2023,21.0,11298.004916,F,0,full,sel,1,5
...,...,...,...,...,...,...,...,...,...,...,...,...
231222,VC3188,6,6,10-12-2023,103.0,1433.273557,M,0,fam,con,3,2749
231223,VC3189,6,6,10-12-2023,103.0,8875.581166,M,0,fam,con,3,2750
231224,VC3190,6,6,10-12-2023,103.0,263.320415,M,0,fam,con,3,2751
231225,VC3191,6,6,10-12-2023,103.0,335.291699,M,0,fam,con,3,2752


In [53]:
activity_df.to_csv('activity_data.csv',index=False)