# Rat_PET_analysis_script

In [1]:
import os
import fnmatch
import pandas as pd
import csv
import numpy as np
from pathlib import Path
from openpyxl import load_workbook

In [2]:
# Definitions

analysis_path = os.path.dirname(os.getcwd())

raw = '01_raw_data'
processed = '02_raw_processed'
stat_scans = 'static_average_scans'
hemi = '01_Hemispheres_separated'
hemi2 = '02_Hemispheres_averaged'
SUV = 'SUV_values' # folder containing the transformed activity in SUV
filepath = [stat_scans]
activity = [SUV]

# lists with rat numbering
rat_list_pet = ['Rat^{}'.format(i) for i in range(66, 90) if i !=70]
rat_list = ['Rat^{}'.format(i) for i in range(66, 90)]

# timepoints_list (in seconds)
#all_timepoints = [150, 450, 750, 1050, 1350, 1650, 900]
#timepoints_dyn = [150, 450, 750, 1050, 1350, 1650]
time_average = 900

# list with relevant regions
regions = ['accumbens', 'cerebellum', 'mPFC', 'OFC', 'frontal_cortex', 'hippocampus', 'striatum', 'thalamus', 'visual_cortex', 'whole_brain']




#### Rat Group Naming convention:

* Veh = 1
* Har = 2
* DMT = 3
* Har + DMT = 4

In [3]:
## allocate rat to corresponding group based on rat number

def group_allocation (row):
    if row['PatientName [string]'] == 'Rat^70' or row['PatientName [string]'] == 'Rat^71' or row['PatientName [string]'] == 'Rat^76' or row['PatientName [string]'] == 'Rat^77' or row['PatientName [string]'] == 'Rat^82' or row['PatientName [string]'] == 'Rat^83':
        return 'Veh'
    if row['PatientName [string]'] == 'Rat^68' or row['PatientName [string]'] == 'Rat^69' or row['PatientName [string]'] == 'Rat^80' or row['PatientName [string]'] == 'Rat^81' or row['PatientName [string]'] == 'Rat^84' or row['PatientName [string]'] == 'Rat^85':
        return 'Har'
    if row['PatientName [string]'] == 'Rat^72' or row['PatientName [string]'] == 'Rat^73' or row['PatientName [string]'] == 'Rat^74' or row['PatientName [string]'] == 'Rat^75' or row['PatientName [string]'] == 'Rat^88' or row['PatientName [string]'] == 'Rat^89':
        return 'DMT'
    if row['PatientName [string]'] == 'Rat^66' or row['PatientName [string]'] == 'Rat^67' or row['PatientName [string]'] == 'Rat^78' or row['PatientName [string]'] == 'Rat^79' or row['PatientName [string]'] == 'Rat^86' or row['PatientName [string]'] == 'Rat^87':
        return 'Har + DMT'
    else:
        return 0
    
def group_allocation_number (row):
    if row['PatientName [string]'] == 'Rat^70' or row['PatientName [string]'] == 'Rat^71' or row['PatientName [string]'] == 'Rat^76' or row['PatientName [string]'] == 'Rat^77' or row['PatientName [string]'] == 'Rat^82' or row['PatientName [string]'] == 'Rat^83':
        return 1
    if row['PatientName [string]'] == 'Rat^68' or row['PatientName [string]'] == 'Rat^69' or row['PatientName [string]'] == 'Rat^80' or row['PatientName [string]'] == 'Rat^81' or row['PatientName [string]'] == 'Rat^84' or row['PatientName [string]'] == 'Rat^85':
        return 2
    if row['PatientName [string]'] == 'Rat^72' or row['PatientName [string]'] == 'Rat^73' or row['PatientName [string]'] == 'Rat^74' or row['PatientName [string]'] == 'Rat^75' or row['PatientName [string]'] == 'Rat^88' or row['PatientName [string]'] == 'Rat^89':
        return 3
    if row['PatientName [string]'] == 'Rat^66' or row['PatientName [string]'] == 'Rat^67' or row['PatientName [string]'] == 'Rat^78' or row['PatientName [string]'] == 'Rat^79' or row['PatientName [string]'] == 'Rat^86' or row['PatientName [string]'] == 'Rat^87':
        return 4
    else:
        return 0
    

ls_group = ['Veh', 'Har', 'DMT', 'Har + DMT']
ls_group_number = [1, 2, 3, 4]

## Open raw data files and save data per region for all rats

The regions of interest are saved in new files.

In [4]:
for scans in filepath:
    for act in activity:
        
        all_regions_file = []
        whole_brain_file = []
        visual_cortex_file = []
        mPFC_file = []
        OFC_file = []
        frontal_cortex_file = []
        hippocampus_file = []
        thalamus_file = []
        striatum_file = []
        accumbens_file = []
        cerebellum_file = []
        

        path = Path(f'{analysis_path}/{raw}/{scans}/{act}')
        path_out = Path(f'{analysis_path}/{processed}/{scans}/{act}/{hemi}')

        xlsx_list = sorted([each for each in os.listdir(path) if each.endswith(".xlsx")])
        for xlsx_raw in xlsx_list:

            df = pd.read_excel(path/xlsx_raw, usecols = 'C,E,H,J:AD', skiprows = 7, header = 0, engine = 'openpyxl')
            
            ## insert some useful columns, e.g. Group Allocation and Group Number
            df.insert(loc=1, column='Treatment_Group', value=df.apply(lambda row: group_allocation (row), axis=1))
            df.insert(loc=2, column='Treatment_Group_#', value=df.apply(lambda row: group_allocation_number (row), axis=1))
            df.insert(loc=3, column='Weight [kg]', value=pd.to_numeric(df.apply(lambda x: x['PatientInfo [string]'][21:], axis=1))) # get weight in kg from 'PatientInfo' and store it as new column displaying only weight as integer
            
            # get rid of this string column
            df = df.drop(['PatientInfo [string]'], axis=1)
            
            # fix time inconsistencies 
            df['Time [seconds]'] =  np.round(df['Time [seconds]'], decimals = 0) 
            df.loc[df['Time [seconds]']==899, 'Time [seconds]'] = 900

           
            
            all_regions = df
            whole_brain = df.loc[df[df.columns[5]]=='Group']
            visual_cortex = df.loc[(df[df.columns[5]]=='VisualCortex_l') | (df[df.columns[5]]=='VisualCortex_r')]            
            mPFC = df.loc[(df[df.columns[5]]=='MedialPrefrontalCortex_l') | (df[df.columns[5]]=='MedialPrefrontalCortex_r')]
            OFC = df.loc[(df[df.columns[5]]=='OrbitofrontalCortexl_l') | (df[df.columns[5]]=='OrbitofrontalCortex_r')]
            frontal_cortex = df.loc[(df[df.columns[5]]=='MedialPrefrontalCortex_l') | (df[df.columns[5]]=='MedialPrefrontalCortex_r') | (df[df.columns[5]]=='OrbitofrontalCortexl_l') | (df[df.columns[5]]=='OrbitofrontalCortex_r')]
            hippocampus = df.loc[(df[df.columns[5]]=='HippocampusAnteroDorsal_l') | (df[df.columns[5]]=='HippocampusAnteroDorsal_r') | (df[df.columns[5]]=='HippocampusPosterior_l') | (df[df.columns[5]]=='HippocampusPosterior_r')]
            thalamus = df.loc[(df[df.columns[5]]=='Thalamus_l') | (df[df.columns[5]]=='Thalamus_r')]
            striatum = df.loc[(df[df.columns[5]]=='Striatum_l') | (df[df.columns[5]]=='Striatum_r')]
            accumbens = df.loc[(df[df.columns[5]]=='Accumbens_l') | (df[df.columns[5]]=='Accumbens_r')]
            cerebellum = df.loc[(df[df.columns[5]]=='Cerebellum_GM_l') | (df[df.columns[5]]=='Cerebellum_GM_r')]
            
            all_regions_file.append(all_regions)
            whole_brain_file.append(whole_brain)
            visual_cortex_file.append(visual_cortex)
            mPFC_file.append(mPFC)
            OFC_file.append(OFC)
            frontal_cortex_file.append(frontal_cortex)
            hippocampus_file.append(hippocampus)
            thalamus_file.append(thalamus)
            striatum_file.append(striatum)
            accumbens_file.append(accumbens)
            cerebellum_file.append(cerebellum)
        
        all_regions_file = pd.concat(all_regions_file, axis = 0)
        whole_brain_file = pd.concat(whole_brain_file, axis = 0)         
        visual_cortex_file = pd.concat(visual_cortex_file, axis = 0)
        mPFC_file = pd.concat(mPFC_file, axis = 0)
        OFC_file = pd.concat(OFC_file, axis = 0)
        frontal_cortex_file = pd.concat(frontal_cortex_file, axis = 0)
        hippocampus_file = pd.concat(hippocampus_file, axis = 0)
        thalamus_file = pd.concat(thalamus_file, axis = 0)
        striatum_file = pd.concat(striatum_file, axis = 0)
        accumbens_file = pd.concat(accumbens_file, axis = 0)
        cerebellum_file = pd.concat(cerebellum_file, axis = 0)
        
        ## save updated files in new excel files
        all_regions_file.to_excel(path_out/'all_regions.xlsx', sheet_name="all_regions", index = False)
        whole_brain_file.to_excel(path_out/'whole_brain.xlsx', sheet_name="whole_brain", index = False)
        visual_cortex_file.to_excel(path_out/'visual_cortex.xlsx', sheet_name="visual_cortex", index = False)
        mPFC_file.to_excel(path_out/'mPFC.xlsx', sheet_name="mPFC", index = False)
        OFC_file.to_excel(path_out/'OFC.xlsx', sheet_name="OFC", index = False)
        frontal_cortex_file.to_excel(path_out/'frontal_cortex.xlsx', sheet_name="frontal_cortex", index = False)
        hippocampus_file.to_excel(path_out/'hippocampus.xlsx', sheet_name="hippocampus", index = False)
        thalamus_file.to_excel(path_out/'thalamus.xlsx', sheet_name="thalamus", index = False)
        striatum_file.to_excel(path_out/'striatum.xlsx', sheet_name="striatum", index = False)
        accumbens_file.to_excel(path_out/'accumbens.xlsx', sheet_name="accumbens", index = False)
        cerebellum_file.to_excel(path_out/'cerebellum.xlsx', sheet_name="cerebellum", index = False)



### a) Add blood glucose correction to the all_regions file

The calibration factor will be glucose levels at time point 2 (time of FDG injection) divided by 6 (as the rough average of blood glucose levels).

In [5]:
df_glucose = pd.read_excel(f'{analysis_path}/00_blood_glucose/Rat_basic_data_with_calculations.xlsx', header = 0, nrows=24)

for scans in filepath:
    for act in activity:
        
        path_out = Path(f'{analysis_path}/{processed}/{scans}/{act}/{hemi}')
        df_updated = pd.read_excel(path_out/'all_regions.xlsx', header = 0, engine = 'openpyxl')
        df_updated.drop(df_updated.columns[10:], axis=1, inplace=True)
        df_updated.insert(loc=10, column='Glucose_level_2 (mmol/L)', value=0)
        df_updated.insert(loc= 11, column='Glucose_level_2_scaled (mmol/L)', value=0)
        df_updated.insert(loc= 12, column='glucose_normalization_factor', value=6)

        for rat in rat_list_pet:

            df_updated.loc[df_updated['PatientName [string]']==rat, ['Glucose_level_2 (mmol/L)']] = df_glucose['Glucose_level_2 (mmol/L)'].values[(df_glucose[df_glucose.columns[0]]== rat)]
            df_updated.loc[df_updated['PatientName [string]']==rat, ['Glucose_level_2_scaled (mmol/L)']] = df_glucose['Glucose_level_2_scaled (mmol/L)'].values[(df_glucose[df_glucose.columns[0]]== rat)]

        df_updated.insert(loc=13, column='activity_normalized_to_glucose', value=(df_updated.iloc[:,7]*df_updated.iloc[:,11]))
        df_updated.insert(loc=14, column='sd_normalized_to_glucose', value=(df_updated.iloc[:,8]*df_updated.iloc[:,11]))
        
        df_updated.to_excel(path_out/'all_regions_glucose_normalized.xlsx', sheet_name="all_regions_glucose_normalized", index = False)



        

### b) Add whole brain normalization to the all_regions file

The brain uptake per region is normalized by the average whole brain uptake from each rat regardless of their group allocation

In [6]:
for scans in filepath:
    for act in activity:
        
        path_out = Path(f'{analysis_path}/{processed}/{scans}/{act}/{hemi}')
        df_updated = pd.read_excel(path_out/'all_regions.xlsx', header = 0, engine = 'openpyxl')
        df_updated.drop(df_updated.columns[10:], axis=1, inplace=True)
        df_updated.insert(loc=10, column='Whole Brain Uptake', value=0)
        df_updated.insert(loc= 11, column='Whole Brain Normalization Factor', value=0)
        mean_global_uptake = df_updated[(df_updated.iloc[:,5]=='Group')].values[:,7].mean()

        for rat in rat_list_pet:

            df_updated.loc[df_updated['PatientName [string]']==rat, ['Whole Brain Uptake']] = df_updated[(df_updated['PatientName [string]']==rat) & (df_updated.iloc[:,5]=='Group')].values[:,7].mean()
            df_updated.loc[df_updated['PatientName [string]']==rat, ['Whole Brain Normalization Factor']] = mean_global_uptake/df_updated.iloc[:,10]
        
        df_updated.insert(loc= 12, column='activity_normalized_with_WB_norm', value=(df_updated.iloc[:,7]*df_updated.iloc[:,11]))        
        df_updated.insert(loc= 13, column='sd_normalized_with_WB_norm', value=(df_updated.iloc[:,8]*df_updated.iloc[:,11]))        


        df_updated.to_excel(path_out/'all_regions_WB_uptake_normalized.xlsx', sheet_name="all_regions_WB_uptake_norm", index = False)

### Order activity in new file and average the activity for both hemispheres

####  a) for the glucose normalized data

In [7]:
for scans in filepath:
    for act in activity:
        df3 = []
        df4 = []
        region_dict = {} #stores key:value pairs and adds them to df3 and df4 dataframe 

        path_out = Path(f'{analysis_path}/{processed}/{scans}/{act}/{hemi}')
        path_out2 = Path(f'{analysis_path}/{processed}/{scans}/{act}/{hemi2}')
        df2 = pd.read_excel(path_out/'all_regions_glucose_normalized.xlsx', header = 0, engine = 'openpyxl')

        ### search for regions of interest in df2 with variables
        df2_accumbens =  ((df2['VoiName(Region) [string]']=='Accumbens_l') |
              (df2['VoiName(Region) [string]']=='Accumbens_r'))

        df2_cerebellum =  ((df2['VoiName(Region) [string]']=='Cerebellum_GM_l') |
                                 (df2['VoiName(Region) [string]']=='Cerebellum_GM_r'))

        df2_mpfc = ((df2['VoiName(Region) [string]']=='MedialPrefrontalCortex_l') |
                                 (df2['VoiName(Region) [string]']=='MedialPrefrontalCortex_r'))

        df2_ofc = ((df2['VoiName(Region) [string]']=='OrbitofrontalCortexl_l') |
                                 (df2['VoiName(Region) [string]']=='OrbitofrontalCortex_r'))

        df2_front_cort = ((df2['VoiName(Region) [string]']=='MedialPrefrontalCortex_l') |
                                 (df2['VoiName(Region) [string]']=='MedialPrefrontalCortex_r') |
                                 (df2['VoiName(Region) [string]']=='OrbitofrontalCortexl_l') |
                                 (df2['VoiName(Region) [string]']=='OrbitofrontalCortex_r'))

        df2_hippocampus =  ((df2['VoiName(Region) [string]']=='HippocampusAnteroDorsal_l') |
                                 (df2['VoiName(Region) [string]']=='HippocampusAnteroDorsal_r') |
                                 (df2['VoiName(Region) [string]']=='HippocampusPosterior_l') |
                                 (df2['VoiName(Region) [string]']=='HippocampusPosterior_r'))

        df2_striatum = ((df2['VoiName(Region) [string]']=='Striatum_l') |
                                 (df2['VoiName(Region) [string]']=='Striatum_r'))

        df2_thalamus =  ((df2['VoiName(Region) [string]']=='Thalamus_l') |
                                 (df2['VoiName(Region) [string]']=='Thalamus_r'))

        df2_visual_cort = ((df2['VoiName(Region) [string]']=='VisualCortex_l') |
                                 (df2['VoiName(Region) [string]']=='VisualCortex_r'))

        df2_whole_brain = (df2['VoiName(Region) [string]']=='Group')

        list_df2_regions = [df2_accumbens, df2_cerebellum, df2_mpfc, df2_ofc, df2_front_cort, df2_hippocampus, df2_striatum, df2_thalamus, df2_visual_cort, df2_whole_brain]

        
        # start with dynamic_scans: 
        if scans == 'dynamic_scans':

            for rat in rat_list_pet:
                for time  in timepoints_dyn:
                    for (area, df2_area) in zip(regions, list_df2_regions):


                        # average the activity of both hemispheres
                        region_dict[area] = np.round((df2[(df2['PatientName [string]']==rat) & (df2['Time [seconds]']==time) & 
                                                    df2_area])[df2.columns[13]].mean(), decimals = 2)

                        # average the standard deviation of both hemispheres
                        region_dict[f'{area}_sd'] = np.round((df2[(df2['PatientName [string]']==rat) & (df2['Time [seconds]']==time) & 
                                                    df2_area])[df2.columns[14]].mean(), decimals = 2)

                        # sum the volume of each region for both hemispheres
                        region_dict[f'{area}_vol'] = np.round((df2[(df2['PatientName [string]']==rat) & (df2['Time [seconds]']==time) & 
                                                    df2_area])[df2.columns[9]].sum()*0.5, decimals = 6)
                        
                    # calculate whole brain volume separately
                    region_dict['whole_brain_vol'] = np.round((df2[(df2['PatientName [string]']==rat) & (df2['Time [seconds]']==900) & 
                            (df2['VoiName(Region) [string]']=='Group')])[df2.columns[9]].sum(), decimals = 6)


                    treat_group = df2.loc[df2['PatientName [string]'] == rat, 'Treatment_Group'].iloc[0]
                    treat_group_number = df2.loc[df2['PatientName [string]'] == rat, 'Treatment_Group_#'].iloc[0]
                    weight = df2.loc[df2['PatientName [string]'] == rat, 'Weight [kg]'].iloc[0]
                    date_time = df2.loc[df2['PatientName [string]'] == rat, 'StudyDate [date_time]'].iloc[0]


                    df3_row = pd.DataFrame(data=[rat, treat_group, treat_group_number, weight, date_time, time]).T 
                    df3_row.columns = ['Rat #', 'Treatment_Group', 'Treatment_Group_#', 'Weight [kg]', 'StudyDate [date_time]', 'Time [seconds]']

                    df3_row = df3_row.assign(**region_dict)

                    df3.append(df3_row)

            df3 = pd.concat(df3, axis=0)
            df3.to_excel(path_out2/'01a_activity_in_relevant_regions_glucose_normalized.xlsx', sheet_name="dynamic_activity", index = False)


        
        # for the static scans
        if scans == 'static_average_scans':

            for rat in rat_list_pet:
                for (area, df2_area) in zip(regions, list_df2_regions):
                    
                    # average the activity of both hemispheres
                    region_dict[area] = np.round((df2[(df2['PatientName [string]']==rat) & (df2['Time [seconds]']==900) & 
                                                df2_area])[df2.columns[13]].mean(), decimals = 2)

                    # average the standard deviation of both hemispheres
                    region_dict[f'{area}_sd'] = np.round((df2[(df2['PatientName [string]']==rat) & (df2['Time [seconds]']==900) & 
                                                df2_area])[df2.columns[14]].mean(), decimals = 2)

                    # sum the volume of each region for both hemispheres
                    region_dict[f'{area}_vol'] = np.round((df2[(df2['PatientName [string]']==rat) & (df2['Time [seconds]']==900) & 
                                                df2_area])[df2.columns[9]].sum()*0.5, decimals = 6)
                    
                # calculate whole brain volume separately
                region_dict['whole_brain_vol'] = np.round((df2[(df2['PatientName [string]']==rat) & (df2['Time [seconds]']==900) & 
                        (df2['VoiName(Region) [string]']=='Group')])[df2.columns[9]].sum(), decimals = 6)


                treat_group = df2.loc[df2['PatientName [string]'] == rat, 'Treatment_Group'].iloc[0]
                treat_group_number = df2.loc[df2['PatientName [string]'] == rat, 'Treatment_Group_#'].iloc[0]
                weight = df2.loc[df2['PatientName [string]'] == rat, 'Weight [kg]'].iloc[0]
                date_time = df2.loc[df2['PatientName [string]'] == rat, 'StudyDate [date_time]'].iloc[0]


                df4_row = pd.DataFrame(data=[rat, treat_group, treat_group_number, weight, date_time, 900]).T 
                df4_row.columns = ['Rat #', 'Treatment_Group', 'Treatment_Group_#', 'Weight [kg]', 'StudyDate [date_time]', 'Time [seconds]']

                df4_row = df4_row.assign(**region_dict)

                df4.append(df4_row)

            df4 = pd.concat(df4, axis=0)
            df4.to_excel(path_out2/'01a_activity_in_relevant_regions_glucose_normalized.xlsx', sheet_name="static_activity", index = False)



#### b) and for the whole brain normalized data

In [8]:
for scans in filepath:
    for act in activity:
        df3 = []
        df4 = []
        region_dict = {} #stores key:value pairs and adds them to df3 and df4 dataframe 

        path_out = Path(f'{analysis_path}/{processed}/{scans}/{act}/{hemi}')
        path_out2 = Path(f'{analysis_path}/{processed}/{scans}/{act}/{hemi2}')
        df2 = pd.read_excel(path_out/'all_regions_WB_uptake_normalized.xlsx', header = 0, engine = 'openpyxl')
        
        ### search for regions of interest in df2 with variables
        df2_accumbens =  ((df2['VoiName(Region) [string]']=='Accumbens_l') |
              (df2['VoiName(Region) [string]']=='Accumbens_r'))

        df2_cerebellum =  ((df2['VoiName(Region) [string]']=='Cerebellum_GM_l') |
                                 (df2['VoiName(Region) [string]']=='Cerebellum_GM_r'))

        df2_mpfc = ((df2['VoiName(Region) [string]']=='MedialPrefrontalCortex_l') |
                                 (df2['VoiName(Region) [string]']=='MedialPrefrontalCortex_r'))

        df2_ofc = ((df2['VoiName(Region) [string]']=='OrbitofrontalCortexl_l') |
                                 (df2['VoiName(Region) [string]']=='OrbitofrontalCortex_r'))

        df2_front_cort = ((df2['VoiName(Region) [string]']=='MedialPrefrontalCortex_l') |
                                 (df2['VoiName(Region) [string]']=='MedialPrefrontalCortex_r') |
                                 (df2['VoiName(Region) [string]']=='OrbitofrontalCortexl_l') |
                                 (df2['VoiName(Region) [string]']=='OrbitofrontalCortex_r'))

        df2_hippocampus =  ((df2['VoiName(Region) [string]']=='HippocampusAnteroDorsal_l') |
                                 (df2['VoiName(Region) [string]']=='HippocampusAnteroDorsal_r') |
                                 (df2['VoiName(Region) [string]']=='HippocampusPosterior_l') |
                                 (df2['VoiName(Region) [string]']=='HippocampusPosterior_r'))

        df2_striatum = ((df2['VoiName(Region) [string]']=='Striatum_l') |
                                 (df2['VoiName(Region) [string]']=='Striatum_r'))

        df2_thalamus =  ((df2['VoiName(Region) [string]']=='Thalamus_l') |
                                 (df2['VoiName(Region) [string]']=='Thalamus_r'))

        df2_visual_cort = ((df2['VoiName(Region) [string]']=='VisualCortex_l') |
                                 (df2['VoiName(Region) [string]']=='VisualCortex_r'))

        df2_whole_brain = (df2['VoiName(Region) [string]']=='Group')

        list_df2_regions = [df2_accumbens, df2_cerebellum, df2_mpfc, df2_ofc, df2_front_cort, df2_hippocampus, df2_striatum, df2_thalamus, df2_visual_cort, df2_whole_brain]

        
        # start with dynamic_scans: 
        if scans == 'dynamic_scans':

            for rat in rat_list_pet:
                for time  in timepoints_dyn:
                    for (area, df2_area) in zip(regions, list_df2_regions):


                        # average the activity of both hemispheres
                        region_dict[area] = np.round((df2[(df2['PatientName [string]']==rat) & (df2['Time [seconds]']==time) & 
                                                    df2_area])[df2.columns[12]].mean(), decimals = 2)

                        # average the standard deviation of both hemispheres
                        region_dict[f'{area}_sd'] = np.round((df2[(df2['PatientName [string]']==rat) & (df2['Time [seconds]']==time) & 
                                                    df2_area])[df2.columns[13]].mean(), decimals = 2)

                        # sum the volume of each region for both hemispheres
                        region_dict[f'{area}_vol'] = np.round((df2[(df2['PatientName [string]']==rat) & (df2['Time [seconds]']==time) & 
                                                    df2_area])[df2.columns[9]].sum()*0.5, decimals = 6)
                        
                    # calculate whole brain volume separately
                    region_dict['whole_brain_vol'] = np.round((df2[(df2['PatientName [string]']==rat) & (df2['Time [seconds]']==900) & 
                            (df2['VoiName(Region) [string]']=='Group')])[df2.columns[9]].sum(), decimals = 6)


                    treat_group = df2.loc[df2['PatientName [string]'] == rat, 'Treatment_Group'].iloc[0]
                    treat_group_number = df2.loc[df2['PatientName [string]'] == rat, 'Treatment_Group_#'].iloc[0]
                    weight = df2.loc[df2['PatientName [string]'] == rat, 'Weight [kg]'].iloc[0]
                    date_time = df2.loc[df2['PatientName [string]'] == rat, 'StudyDate [date_time]'].iloc[0]


                    df3_row = pd.DataFrame(data=[rat, treat_group, treat_group_number, weight, date_time, time]).T 
                    df3_row.columns = ['Rat #', 'Treatment_Group', 'Treatment_Group_#', 'Weight [kg]', 'StudyDate [date_time]', 'Time [seconds]']

                    df3_row = df3_row.assign(**region_dict)

                    df3.append(df3_row)

            df3 = pd.concat(df3, axis=0)
            df3.to_excel(path_out2/'01b_activity_in_relevant_regions_WB_uptake_normalized.xlsx', sheet_name="dynamic_activity", index = False)


        
        # for the static scans
        if scans == 'static_average_scans':

            for rat in rat_list_pet:
                for (area, df2_area) in zip(regions, list_df2_regions):
                    
                    # average the activity of both hemispheres
                    region_dict[area] = np.round((df2[(df2['PatientName [string]']==rat) & (df2['Time [seconds]']==900) & 
                                                df2_area])[df2.columns[12]].mean(), decimals = 2)

                    # average the standard deviation of both hemispheres
                    region_dict[f'{area}_sd'] = np.round((df2[(df2['PatientName [string]']==rat) & (df2['Time [seconds]']==900) & 
                                                df2_area])[df2.columns[13]].mean(), decimals = 2)

                    # sum the volume of each region for both hemispheres
                    region_dict[f'{area}_vol'] = np.round((df2[(df2['PatientName [string]']==rat) & (df2['Time [seconds]']==900) & 
                                                df2_area])[df2.columns[9]].sum()*0.5, decimals = 6)
                    
                # calculate whole brain volume separately
                region_dict['whole_brain_vol'] = np.round((df2[(df2['PatientName [string]']==rat) & (df2['Time [seconds]']==900) & 
                        (df2['VoiName(Region) [string]']=='Group')])[df2.columns[9]].sum(), decimals = 6)


                treat_group = df2.loc[df2['PatientName [string]'] == rat, 'Treatment_Group'].iloc[0]
                treat_group_number = df2.loc[df2['PatientName [string]'] == rat, 'Treatment_Group_#'].iloc[0]
                weight = df2.loc[df2['PatientName [string]'] == rat, 'Weight [kg]'].iloc[0]
                date_time = df2.loc[df2['PatientName [string]'] == rat, 'StudyDate [date_time]'].iloc[0]


                df4_row = pd.DataFrame(data=[rat, treat_group, treat_group_number, weight, date_time, 900]).T 
                df4_row.columns = ['Rat #', 'Treatment_Group', 'Treatment_Group_#', 'Weight [kg]', 'StudyDate [date_time]', 'Time [seconds]']

                df4_row = df4_row.assign(**region_dict)

                df4.append(df4_row)

            df4 = pd.concat(df4, axis=0)
            df4.to_excel(path_out2/'01b_activity_in_relevant_regions_WB_uptake_normalized.xlsx', sheet_name="static_activity", index = False)



## Calculate mean, sd, sem per group 



In [9]:
ls_files = ['01a_activity_in_relevant_regions_glucose_normalized.xlsx', 
            '01b_activity_in_relevant_regions_WB_uptake_normalized.xlsx']

ls_files_out = ['02a_activity_in_relevant_regions_glucose_normalized_grouped.xlsx', 
                '02b_activity_in_relevant_regions_WB_uptake_normalized_grouped.xlsx']

In [10]:
for scans in filepath:
    for act in activity:
        for (file_in, file_out) in zip(ls_files, ls_files_out):
            path_out2 = Path(f'{analysis_path}/{processed}/{scans}/{act}/{hemi2}')

            df5 = pd.read_excel(path_out2/file_in, header = 0, engine = 'openpyxl')
            df6 = []
            df7 = []

            region_dict_group = {}

            # start with dynamic_scans: 
            if scans == 'dynamic_scans':

                for group in ls_group:
                    for time  in timepoints_dyn:
                        for area in regions:

                            # loop over all relevant regions
                            region_dict_group[f'{area}_mean'] = np.round(df5[(df5['Treatment_Group']== group) & (df5['Time [seconds]']==time)][area].mean(), decimals =2)
                            region_dict_group[f'{area}_sd'] = np.round(df5[(df5['Treatment_Group']== group) & (df5['Time [seconds]']==time)][area].std(), decimals =2)
                            region_dict_group[f'{area}_vol'] = np.round(df5[(df5['Treatment_Group']== group) & (df5['Time [seconds]']==time)][f'{area}_vol'].mean(), decimals =2)

                        treat_group = df5.loc[df5['Treatment_Group']==group, 'Treatment_Group'].iloc[0]
                        treat_group_number = df5.loc[df5['Treatment_Group']==group, 'Treatment_Group_#'].iloc[0]

                        df6_row = pd.DataFrame(data=[treat_group, treat_group_number, weight, date_time, time]).T 
                        df6_row.columns = ['Treatment_Group', 'Treatment_Group_#', 'Weight [kg]', 'StudyDate [date_time]', 'Time [seconds]']

                        df6_row = df6_row.assign(**region_dict_group)

                        df6.append(df6_row)

                df6 = pd.concat(df6, axis=0)
                df6.to_excel(path_out2/file_out, sheet_name="dynamic_activity_grouped", index = False)


            # for the static scans
            if scans == 'static_average_scans':

                for group in ls_group:
                    for area in regions:

                        # loop over all relevant regions
                        region_dict_group[f'{area}_mean'] = np.round(df5[(df5['Treatment_Group']== group) & (df5['Time [seconds]']==900)][area].mean(), decimals =2)
                        region_dict_group[f'{area}_sd'] = np.round(df5[(df5['Treatment_Group']== group) & (df5['Time [seconds]']==900)][area].std(), decimals =2)
                        region_dict_group[f'{area}_vol'] = np.round(df5[(df5['Treatment_Group']== group) & (df5['Time [seconds]']==900)][f'{area}_vol'].mean(), decimals =2)

                    treat_group = df5.loc[df5['Treatment_Group']==group, 'Treatment_Group'].iloc[0]
                    treat_group_number = df5.loc[df5['Treatment_Group']==group, 'Treatment_Group_#'].iloc[0]

                    df7_row = pd.DataFrame(data=[treat_group, treat_group_number, weight, date_time, 900]).T 
                    df7_row.columns = ['Treatment_Group', 'Treatment_Group_#', 'Weight [kg]', 'StudyDate [date_time]', 'Time [seconds]']

                    df7_row = df7_row.assign(**region_dict_group)

                    df7.append(df7_row)

                df7 = pd.concat(df7, axis=0)
                df7.to_excel(path_out2/file_out, sheet_name="static_activity_grouped", index = False)

        
        