In [1]:
import pandas as pd 
import numpy as np 

## The following script is organizing our final translating time data table with both pigs and humans. Calculating and adding new event scores for the MWF inflection points.


#### The Pig_MWF data was reprocessed in March 2024 to include Combined Regions. This was due to the discrepency between left and right regions that was observed (specifically Putamen_GP and Internal Capsules). Additionally, pig data was showing that Corpus Callosum was a late myelinating region wheras for infant data it was an early myelinating region. The newer version of the Pig_Brain_Atlas_Wk12 was used and additional data points were added in Week18 and Week24 because registration was fixed for several pigs. Event scores were calculated for this updated data.

In [5]:
pig_gomp_data_updated = pd.read_excel("Pig_MWF_Inflection_PCDs.xlsx")
pig_gomp_data_updated


Unnamed: 0,Event,PCD_lr
0,Combined_Cortex_Inflection,100.915981
1,Combined_Internal_Capsule_Inflection,63.70595
2,Combined_Putamen_and_Globus_Palidus_Inflection,63.570911
3,Corpus_Callosum_Inflection,74.267026
4,Hypothalamus_Inflection,94.085391
5,Lateral_Ventricle_Inflection,76.605415
6,Left_Caudate_Inflection,56.854619
7,Left_Cortex_Inflection,97.284653
8,Left_Hippocampus_Inflection,99.766244
9,Left_Internal_Capsule_Inflection,57.105239


In [10]:
def calcEscore(PCD_lr, S_scale = 2.15115318237061):
    es_calc = np.log(PCD_lr - 4.42) - S_scale
    return es_calc
# Pig_Species_Score = 2.15115318237061
# Human_Species_Score = 2.500

def predPCD(E_scale, S_scale):
    pcd = np.exp(E_scale + S_scale) + 4.42 
    return pcd

In [7]:
pig_gomp_data_updated["Event_Score"] = pig_gomp_data_updated[['PCD_lr']].apply(lambda e: calcEscore(e))
pig_gomp_data_updated = pig_gomp_data_updated[['Event', 'Event_Score']]
pig_gomp_data_updated

Unnamed: 0,Event,Event_Score
0,Combined_Cortex_Inflection,2.418348
1,Combined_Internal_Capsule_Inflection,1.931219
2,Combined_Putamen_and_Globus_Palidus_Inflection,1.928939
3,Corpus_Callosum_Inflection,2.095154
4,Hypothalamus_Inflection,2.344932
5,Lateral_Ventricle_Inflection,2.128085
6,Left_Caudate_Inflection,1.808414
7,Left_Cortex_Inflection,2.37999
8,Left_Hippocampus_Inflection,2.406362
9,Left_Internal_Capsule_Inflection,1.813182


In [8]:
translating_time = pd.read_csv("Translating_time_event_scores.csv")
translating_time["Event_Score"] = translating_time["Event_Score"].apply(lambda e: float(e))
#'.apply(lambda e: float(e))' was used in order to convert the string values in translating_time data table into numeric values

In [11]:
final_data_updated = pd.concat([translating_time, pig_gomp_data_updated]).sort_values(by=['Event_Score'])
final_data_updated['rat_PCD_pred'] = final_data_updated[['Event_Score']].apply(lambda e: predPCD(e, 0.897))
final_data_updated['pig_PCD_pred'] = final_data_updated[['Event_Score']].apply(lambda e: predPCD(e, 2.15115318237061))
final_data_updated['primate_PCD_pred'] = final_data_updated[['Event_Score']].apply(lambda e: predPCD(e, 2.255))
final_data_updated['infant_PCD_pred'] = final_data_updated[['Event_Score']].apply(lambda e: predPCD(e, 2.5))
final_data_updated.to_csv('Updated_03.2024_Final_Translating_Time_DataSet.csv')
final_data_updated

Unnamed: 0,Event,Event_Score,rat_PCD_pred,pig_PCD_pred,primate_PCD_pred,infant_PCD_pred
0,cranial motor nuclei-peak,0.903000,10.469647,25.623223,27.943502,34.474127
1,retinal ganglion cell generation-start,1.023000,11.240958,28.326567,30.942674,38.305934
2,subplate-start,1.038000,11.344044,28.687868,31.343513,38.818054
3,locus coeruleus - peak,1.070000,11.569197,29.476999,32.218999,39.936593
4,inferior olivary nucleus - peak,1.073000,11.590676,29.552283,32.302521,40.043303
...,...,...,...,...,...,...
0,Combined_Cortex_Inflection,2.418348,31.951978,100.915981,111.475584,141.196495
12,Midbrain_Inflection,2.427603,32.207974,101.813212,112.470999,142.468260
14,Right_Cortex_Inflection,2.446176,32.728894,103.638969,114.496550,145.056147
11,Left_Superior_Colliculus_Inflection,2.527348,35.122627,112.028691,123.804363,156.948007


## The following scripts below are for the OLD Pig_MWF Data that was processed in Feb 2023. Event scores were calculated for both pigs and infants in the following scripts.

#### Step 1. Loading in the excel data that has PCD days for events found for the pig with no event score.

In [56]:
event_Nscore = pd.read_csv("events_no_Escore.csv")
event_Nscore

Unnamed: 0,Event,PCD_lr,CI
0,Superior colliuli appear,27.29,3
1,Anterior commissure and external capsule peak,29.778,3
2,Basal ganglia appears,29.778,3
3,Inferior colliculi appear,31.955,3
4,Red nucleus starts,31.955,3
5,Substantia nigra starts,36.62,3
6,Ventrally anglioblasts reach ocular equator,45.95,3
7,All eight hippocmpal flexures are distinguishable,49.06,3
8,Entorhinal cortex appears,50.0,3
9,Clear delineation of MEC and LEC,60.0,3


#### Step 2. Defining the equation for calculating new event scores given actual PCD day of event.

In [57]:
def calcEscore(PCD_lr, S_scale = 2.15115318237061):
    es_calc = np.log(PCD_lr - 4.42) - S_scale
    return es_calc
# Pig_Species_Score = 2.15115318237061
# Human_Species_Score = 2.500

#### Step 3. Apply the calculating event score equation to the event score data in order to calculate new event scores for the new events using the PCD days found in literature. New data frame created with these new Event Scores.

In [58]:
event_Nscore["Event_Score"] = event_Nscore['PCD_lr'].apply(lambda e: calcEscore(e))
event_Nscore = event_Nscore[['Event', 'Event_Score']]

#### Step 4. Reading the translating time data - excel sheet that has all of the event scores for each neurodevelopmental event from the translating time paper. In order to make sure event scores were registered as numeric values in the data table, string values were converted into float format.

In [59]:
translating_time = pd.read_csv("Translating_time_event_scores.csv")
translating_time["Event_Score"] = translating_time["Event_Score"].apply(lambda e: float(e))
#'.apply(lambda e: float(e))' was used in order to convert the string values in translating_time data table into a numeric values

#### Step 5. Combine the event score data table and translating time data table in order to add these new events into the final table. Data tables were combined via the Event Score column.

In [60]:
pd.concat([event_Nscore, translating_time]).sort_values(by=['Event_Score'])

Unnamed: 0,Event,Event_Score
0,cranial motor nuclei-peak,0.903000
0,Superior colliuli appear,0.978673
1,retinal ganglion cell generation-start,1.023000
2,subplate-start,1.038000
3,locus coeruleus - peak,1.070000
...,...,...
91,retinal bipolar cells - peak,2.215000
92,ipsi/contra segregation in LGN and SC,2.298000
93,rapid axon loss in optic nerve ends,2.331000
11,Parasubiculum appears,2.408810


#### Step 6. Following was added because when inflection points were added into the excel file via a loop in R, the numeric values were not registered as actual values and were in a funky format. All PCD values and White Matter regions for inflection points defined as either string or integer values in separate lists titled WM_PCD or WM_Regions respectively.

In [62]:
WM_PCD = [
    109.934825594658,
    92.4455604954579,
    75.2307162057536,
    85.4023938153514,
    101.795553219836,
    100.479580757356,
    71.4106369023424,
    69.2489796355694,
    106.639673487131,
    95.6471206379403,
    95.5556861403676,
    103.431531567396,
    100.099215705644,
    81.1425152161125,
    47.4038888085314,
    107.966092630953,
    83.3547102751999,
    104.151799950896,
    576.877722110884,
    600.602591950829,
    595.08909716705,
    547.914097449628,
    415.971017105917,
    814.091448125334,
    909.057944402252,
    906.250836401558,
    940.786794712443,
    769.15083432719,
    770.466135863592,
    618.706921593067,
    650.605307032628,
    634.145501416384,
    510.257778156122,
    510.803619803993
]
WM_regions = [
    'pCC',
'pHypo',
'pLV',
'pLCaud',
'pLCort',
'pLHippo',
'pLIC',
'pLPutGP',
'pLSupCol',
'pMid',
'pRCaud',
'pRCort',
'pRHippo',
'pRIC',
'pRPutGP',
'pRSupCol',
'pThal',
'pWB',
'iCC',
'ibodyCC',
'igenuCC',
'isCC',
'iCere',
'iLCaud',
'iRCaud',
'iLHippo',
'iRHippo',
'iLPut',
'iRPut',
'iLThal',
'irThal',
'iThal',
'iLIC',
'iRIC',
]

for i in range(len(WM_regions)):
    WM_regions[i] = WM_regions[i] + '_inflection'

#### Step 7. Create a new data table called "igomp_MWF_data' that includes all of the white matter regions and inflection points for infants. In the list we are specificying that items 0-17 are dropped so that we just get the infant data. White matter regions will be read in and added to the Event column and MWF PCD days will be added in as the PCD_lr column.

In [63]:
igomp_MWF_data = pd.DataFrame({'Event':WM_regions, 'PCD_lr': WM_PCD})
igomp_MWF_data = igomp_MWF_data.drop(range(18)).reset_index(drop = True)

#### Step 8. Using the Human species score of 2.5 and the PCD calculated via the gompertz for the infant data, we calculate the event scores for the inflection points of the white matter regions. ***Using the infant data we are getting event scores that are very high (3.5-4.9) which doesn't make sense for the pig data.

In [64]:
igomp_MWF_data["Event_Score"] = igomp_MWF_data[['PCD_lr']].apply(lambda e: calcEscore(e, 2.500))
igomp_MWF_data = igomp_MWF_data[['Event', 'Event_Score']]
igomp_MWF_data

Unnamed: 0,Event,Event_Score
0,iCC_inflection,3.849939
1,ibodyCC_inflection,3.890547
2,igenuCC_inflection,3.881256
3,isCC_inflection,3.798019
4,iCere_inflection,3.519933
5,iLCaud_inflection,4.196629
6,iRCaud_inflection,4.307535
7,iLHippo_inflection,4.304427
8,iRHippo_inflection,4.342007
9,iLPut_inflection,4.139524


#### Step 9. Now we are creating a new data table for the pig MWF data called 'pgomp_MWF_data'. Similar to Step 7 except with this version you are specifying a range that starts at 18 and drops all values until the end of the list 'len(WM_regions)'.

In [65]:
pgomp_MWF_data = pd.DataFrame({'Event':WM_regions, 'PCD_lr': WM_PCD})
pgomp_MWF_data = pgomp_MWF_data.drop(range(18, len(WM_regions))).reset_index(drop = True)
pgomp_MWF_data

Unnamed: 0,Event,PCD_lr
0,pCC_inflection,109.934826
1,pHypo_inflection,92.44556
2,pLV_inflection,75.230716
3,pLCaud_inflection,85.402394
4,pLCort_inflection,101.795553
5,pLHippo_inflection,100.479581
6,pLIC_inflection,71.410637
7,pLPutGP_inflection,69.24898
8,pLSupCol_inflection,106.639673
9,pMid_inflection,95.647121


#### Step 10. Similar to Step 8 except now we are using the calculating e score equation which already uses the pig species score of 2.15. Calculating the inflection point event scores given the pig PCD days.

In [66]:
pgomp_MWF_data["Event_Score"] = pgomp_MWF_data[['PCD_lr']].apply(lambda e: calcEscore(e))
pgomp_MWF_data = pgomp_MWF_data[['Event', 'Event_Score']]
pgomp_MWF_data

Unnamed: 0,Event,Event_Score
0,pCC_inflection,2.507698
1,pHypo_inflection,2.326474
2,pLV_inflection,2.108857
3,pLCaud_inflection,2.243079
4,pLCort_inflection,2.427422
5,pLHippo_inflection,2.413815
6,pLIC_inflection,2.0534
7,pLPutGP_inflection,2.0206
8,pLSupCol_inflection,2.475971
9,pMid_inflection,2.362199


#### Step 11. Now we are defining the translating time function for predicting PCD days of events.

In [67]:
def predPCD(E_scale, S_scale):
    pcd = np.exp(E_scale + S_scale) + 4.42 
    return pcd

#### Step 12. Now we are creating our final data set with all of the events from translating time and MWF inflection points. The table is organized by Event score and we used the event scores for the inflection points using the pig species score. All predicted PCDs for pigs and humans are calculated with the defined equaiton above using the species specific species score.

In [69]:
final_data = pd.concat([event_Nscore, translating_time, pgomp_MWF_data]).sort_values(by=['Event_Score'])
final_data['rat_PCD_pred'] = final_data[['Event_Score']].apply(lambda e: predPCD(e, 0.897))
final_data['pig_PCD_pred'] = final_data[['Event_Score']].apply(lambda e: predPCD(e, 2.15115318237061))
final_data['infant_PCD_pred'] = final_data[['Event_Score']].apply(lambda e: predPCD(e, 2.5))
final_data.to_csv('Final_Translating_Time_DataSet.csv')
final_data

Unnamed: 0,Event,Event_Score,rat_PCD_pred,pig_PCD_pred,infant_PCD_pred
0,cranial motor nuclei-peak,0.903000,10.469647,25.623223,34.474127
0,Superior colliuli appear,0.978673,10.945208,27.290000,36.836671
1,retinal ganglion cell generation-start,1.023000,11.240958,28.326567,38.305934
2,subplate-start,1.038000,11.344044,28.687868,38.818054
3,locus coeruleus - peak,1.070000,11.569197,29.476999,39.936593
...,...,...,...,...,...
17,pWB_inflection,2.451331,32.875214,104.151800,145.783049
8,pLSupCol_inflection,2.475971,33.585047,106.639673,149.309441
15,pRSupCol_inflection,2.488864,33.963498,107.966093,151.189550
0,pCC_inflection,2.507698,34.525211,109.934826,153.980095


#### Step 12. Defining the limbic region equation based on Clancy et al (2000) The course of human events: predicting the timing of primate neural development

In [70]:
def limbic_predPCD(E_scale, S_scale):
    pcd = np.exp((E_scale + S_scale)-0.09031) + 4.42 
    return pcd

In [71]:
row_of_interest = [ 
'pHypo_inflection', 
'pLHippo_inflection', 
'pRHippo_inflection', 
'pLPutGP_inflection',
'pRPutGP_inflection',
'pThal_inflection', 
]

final_data[final_data['Event'].isin(row_of_interest)]

Unnamed: 0,Event,Event_Score,rat_PCD_pred,pig_PCD_pred,infant_PCD_pred
14,pRPutGP_inflection,1.609672,16.68405,47.403889,65.346742
7,pLPutGP_inflection,2.0206,22.916833,69.24898,96.310673
16,pThal_inflection,2.217468,26.941443,83.35471,116.304588
1,pHypo_inflection,2.326474,29.53522,92.44556,129.19025
12,pRHippo_inflection,2.409848,31.718941,100.099216,140.038787
5,pLHippo_inflection,2.413815,31.827466,100.479581,140.577928


In [72]:
final_data.loc[final_data['Event'].isin(row_of_interest), 'infant_PCD_pred'] = final_data.loc[final_data['Event'].isin(row_of_interest), 'Event_Score'].apply(lambda e: limbic_predPCD(e, 2.5))

In [73]:
final_data[final_data['Event'].isin(row_of_interest)]

Unnamed: 0,Event,Event_Score,rat_PCD_pred,pig_PCD_pred,infant_PCD_pred
14,pRPutGP_inflection,1.609672,16.68405,47.403889,60.08559
7,pLPutGP_inflection,2.0206,22.916833,69.24898,88.375722
16,pThal_inflection,2.217468,26.941443,83.35471,106.64312
1,pHypo_inflection,2.326474,29.53522,92.44556,118.416079
12,pRHippo_inflection,2.409848,31.718941,100.099216,128.327821
5,pLHippo_inflection,2.413815,31.827466,100.479581,128.820407


In [74]:
def cortical_predPCD(E_scale, S_scale):
    pcd = np.exp((E_scale + S_scale)+0.21722) + 4.42 
    return pcd

In [75]:
row_of_interest = [
'pCC_inflection',
'pLCort_inflection', 
'pRCort_inflection',
'pLSupCol_inflection',
'pRSupCol_inflection',
'pLIC_inflection',
'pRIC_inflection',
]

final_data[final_data['Event'].isin(row_of_interest)]

Unnamed: 0,Event,Event_Score,rat_PCD_pred,pig_PCD_pred,infant_PCD_pred
6,pLIC_inflection,2.0534,23.533592,71.410637,99.374676
13,pRIC_inflection,2.189042,26.310265,81.142515,113.168952
4,pLCort_inflection,2.427422,32.202936,101.795553,142.44323
11,pRCort_inflection,2.444083,32.669709,103.431532,144.762118
8,pLSupCol_inflection,2.475971,33.585047,106.639673,149.309441
15,pRSupCol_inflection,2.488864,33.963498,107.966093,151.18955
0,pCC_inflection,2.507698,34.525211,109.934826,153.980095


In [76]:
final_data.loc[final_data['Event'].isin(row_of_interest), 'infant_PCD_pred'] = final_data.loc[final_data['Event'].isin(row_of_interest), 'Event_Score'].apply(lambda e: cortical_predPCD(e, 2.5))

In [77]:
final_data[final_data['Event'].isin(row_of_interest)]

Unnamed: 0,Event,Event_Score,rat_PCD_pred,pig_PCD_pred,infant_PCD_pred
6,pLIC_inflection,2.0534,23.533592,71.410637,122.412337
13,pRIC_inflection,2.189042,26.310265,81.142515,139.553345
4,pLCort_inflection,2.427422,32.202936,101.795553,175.930074
11,pRCort_inflection,2.444083,32.669709,103.431532,178.811564
8,pLSupCol_inflection,2.475971,33.585047,106.639673,184.462148
15,pRSupCol_inflection,2.488864,33.963498,107.966093,186.798404
0,pCC_inflection,2.507698,34.525211,109.934826,190.265984


In [78]:
final_data
final_data.to_csv('Adjusted_Final_Translating_Time_DataSet.csv')