In [56]:
import pandas as pd 
import numpy as np 

#### Step 1: Load Data (the original data excel sheet)

In [57]:
df = pd.read_csv("trans_time_pig.csv")
df

Unnamed: 0,E_scale,S_scale,Event,PCD_lr,CI
0,0.903,2.226826,Cranial motor nuclei peak,27.29,2
1,1.128,2.343966,Red nucleus peak,36.62,2
2,1.163,2.006812,External capsule appears,28.223,3
3,1.244,1.997811,Cortical layer VI start,30.0,3
4,1.248,2.316166,Mammillo-thalamic tract appears,39.73,3
5,1.25,2.398578,Axons in optic stalk,42.84,2
6,1.272,2.969614,Globus pallidus peak,73.94,3
7,1.302,1.827826,Medial forebrain bundle appears,27.29,1
8,1.305,1.768896,Internal Capsule Appears - starts,26.046,3
9,1.401,2.325416,Superior colliculi peak,45.95,1


#### Step 2: Define PCD prediction function

In [58]:
def predPCD(E_scale, S_scale):
    pcd = np.exp(E_scale + S_scale) + 4.42 
    return pcd

#### Step 3: Define species scale range

In [59]:
s_range = np.arange(1.5, 3.201, 0.001)
s_range

array([1.5  , 1.501, 1.502, ..., 3.198, 3.199, 3.2  ])

#### Step 4: Initialize dictionary for species scale SSR

In [60]:
s_scale_ssr = {}

#### Step 5: Make a loop - iteratively calculating SSR based on the species scale range and event score

In [61]:
for s_scale in s_range:
    print(s_scale)
    predicted_pcd = df['E_scale'].apply(lambda e: predPCD(e, s_scale))
    square_diffs = (predicted_pcd - df['PCD_lr']) ** 2
    ssr = square_diffs.sum()
    s_scale_ssr[str(s_scale)] = ssr

1.5
1.501
1.5019999999999998
1.5029999999999997
1.5039999999999996
1.5049999999999994
1.5059999999999993
1.5069999999999992
1.5079999999999991
1.508999999999999
1.509999999999999
1.5109999999999988
1.5119999999999987
1.5129999999999986
1.5139999999999985
1.5149999999999983
1.5159999999999982
1.5169999999999981
1.517999999999998
1.518999999999998
1.5199999999999978
1.5209999999999977
1.5219999999999976
1.5229999999999975
1.5239999999999974
1.5249999999999972
1.5259999999999971
1.526999999999997
1.527999999999997
1.5289999999999968
1.5299999999999967
1.5309999999999966
1.5319999999999965
1.5329999999999964
1.5339999999999963
1.5349999999999961
1.535999999999996
1.536999999999996
1.5379999999999958
1.5389999999999957
1.5399999999999956
1.5409999999999955
1.5419999999999954
1.5429999999999953
1.5439999999999952
1.544999999999995
1.545999999999995
1.5469999999999948
1.5479999999999947
1.5489999999999946
1.5499999999999945
1.5509999999999944
1.5519999999999943
1.5529999999999942
1.5539999999

1.835999999999963
1.8369999999999629
1.8379999999999628
1.8389999999999627
1.8399999999999626
1.8409999999999624
1.8419999999999623
1.8429999999999622
1.8439999999999621
1.844999999999962
1.845999999999962
1.8469999999999618
1.8479999999999617
1.8489999999999616
1.8499999999999615
1.8509999999999613
1.8519999999999612
1.8529999999999611
1.853999999999961
1.854999999999961
1.8559999999999608
1.8569999999999607
1.8579999999999606
1.8589999999999605
1.8599999999999604
1.8609999999999602
1.8619999999999601
1.86299999999996
1.86399999999996
1.8649999999999598
1.8659999999999597
1.8669999999999596
1.8679999999999595
1.8689999999999594
1.8699999999999593
1.8709999999999591
1.871999999999959
1.872999999999959
1.8739999999999588
1.8749999999999587
1.8759999999999586
1.8769999999999585
1.8779999999999584
1.8789999999999583
1.8799999999999581
1.880999999999958
1.881999999999958
1.8829999999999578
1.8839999999999577
1.8849999999999576
1.8859999999999575
1.8869999999999574
1.8879999999999573
1.8889

#### Step 6: Visualize the s_scale_ssr dictionary with the calculated SSR values for each s_range value 

In [62]:
s_scale_ssr

{'1.5': 20385.79135414144,
 '1.501': 20357.889965800343,
 '1.5019999999999998': 20329.98901141362,
 '1.5029999999999997': 20302.088548170785,
 '1.5039999999999996': 20274.188633432157,
 '1.5049999999999994': 20246.289324729347,
 '1.5059999999999993': 20218.39067976557,
 '1.5069999999999992': 20190.492756416104,
 '1.5079999999999991': 20162.595612728648,
 '1.508999999999999': 20134.69930692375,
 '1.509999999999999': 20106.803897395206,
 '1.5109999999999988': 20078.90944271049,
 '1.5119999999999987': 20051.016001611068,
 '1.5129999999999986': 20023.12363301291,
 '1.5139999999999985': 19995.232396006835,
 '1.5149999999999983': 19967.34234985896,
 '1.5159999999999982': 19939.453554011034,
 '1.5169999999999981': 19911.56606808093,
 '1.517999999999998': 19883.679951863014,
 '1.518999999999998': 19855.795265328565,
 '1.5199999999999978': 19827.912068626163,
 '1.5209999999999977': 19800.03042208213,
 '1.5219999999999976': 19772.15038620094,
 '1.5229999999999975': 19744.272021665634,
 '1.523999

#### Step 7: Find the species scale value with the smallest SSR and print it out

In [63]:
best_s_scale = min(s_scale_ssr, key=s_scale_ssr.get)
print(f'best s scale at: {best_s_scale}, with ssr of {s_scale_ssr[best_s_scale]}')

best s scale at: 2.1859999999999244, with ssr of 6620.958631151548


#### Step 8: Repeat the same steps, but taking into account the confidence level for event scores greater than 1

In [64]:
df2 = df[df['CI'] > 1]
s_scale_ssr_g1 = {}
for s_scale in s_range:
    print(s_scale)
    predicted_pcd = df2['E_scale'].apply(lambda e: predPCD(e, s_scale))
    square_diffs = (predicted_pcd - df['PCD_lr']) ** 2
    ssr = square_diffs.sum()
    s_scale_ssr_g1[str(s_scale)] = ssr

print(s_scale_ssr_g1)
best_s_scale_g1 = min(s_scale_ssr_g1, key=s_scale_ssr_g1.get)
print(f'best s scale at: {best_s_scale_g1}, with ssr of {s_scale_ssr_g1[best_s_scale_g1]}')



1.5
1.501
1.5019999999999998
1.5029999999999997
1.5039999999999996
1.5049999999999994
1.5059999999999993
1.5069999999999992
1.5079999999999991
1.508999999999999
1.509999999999999
1.5109999999999988
1.5119999999999987
1.5129999999999986
1.5139999999999985
1.5149999999999983
1.5159999999999982
1.5169999999999981
1.517999999999998
1.518999999999998
1.5199999999999978
1.5209999999999977
1.5219999999999976
1.5229999999999975
1.5239999999999974
1.5249999999999972
1.5259999999999971
1.526999999999997
1.527999999999997
1.5289999999999968
1.5299999999999967
1.5309999999999966
1.5319999999999965
1.5329999999999964
1.5339999999999963
1.5349999999999961
1.535999999999996
1.536999999999996
1.5379999999999958
1.5389999999999957
1.5399999999999956
1.5409999999999955
1.5419999999999954
1.5429999999999953
1.5439999999999952
1.544999999999995
1.545999999999995
1.5469999999999948
1.5479999999999947
1.5489999999999946
1.5499999999999945
1.5509999999999944
1.5519999999999943
1.5529999999999942
1.5539999999

#### Step 9: Repeat the same steps, but taking into account the confidence interval for event scores greater than 2

In [65]:
df3 = df[df['CI'] > 2]
df3.head()
s_scale_ssr_g2 = {}
for s_scale in s_range:
    print(s_scale)
    predicted_pcd = df3['E_scale'].apply(lambda e: predPCD(e, s_scale))
    square_diffs = (predicted_pcd - df['PCD_lr']) ** 2
    ssr = square_diffs.sum()
    s_scale_ssr_g2[str(s_scale)] = ssr

print(s_scale_ssr_g2)
best_s_scale_g2 = min(s_scale_ssr_g2, key=s_scale_ssr_g2.get)
print(f'best s scale at: {best_s_scale_g2}, with ssr of {s_scale_ssr_g2[best_s_scale_g2]}')


1.5
1.501
1.5019999999999998
1.5029999999999997
1.5039999999999996
1.5049999999999994
1.5059999999999993
1.5069999999999992
1.5079999999999991
1.508999999999999
1.509999999999999
1.5109999999999988
1.5119999999999987
1.5129999999999986
1.5139999999999985
1.5149999999999983
1.5159999999999982
1.5169999999999981
1.517999999999998
1.518999999999998
1.5199999999999978
1.5209999999999977
1.5219999999999976
1.5229999999999975
1.5239999999999974
1.5249999999999972
1.5259999999999971
1.526999999999997
1.527999999999997
1.5289999999999968
1.5299999999999967
1.5309999999999966
1.5319999999999965
1.5329999999999964
1.5339999999999963
1.5349999999999961
1.535999999999996
1.536999999999996
1.5379999999999958
1.5389999999999957
1.5399999999999956
1.5409999999999955
1.5419999999999954
1.5429999999999953
1.5439999999999952
1.544999999999995
1.545999999999995
1.5469999999999948
1.5479999999999947
1.5489999999999946
1.5499999999999945
1.5509999999999944
1.5519999999999943
1.5529999999999942
1.5539999999

In [66]:
df3['S_scale'].mean()

2.1029573573830005

In [67]:
(s_scale_ssr_g2[best_s_scale_g2]) ** (1/2)

50.65294444701791

#### Step 10: Using the best determined species scales to calculate all event dates

In [68]:
df["PCD_S_scale_2.151_predicted"] = df['E_scale'].apply(lambda e: predPCD(e, float(best_s_scale_g2)))
df["PCD_differences_2.151"] = np.abs(df["PCD_S_scale_2.151_predicted"] - df["PCD_lr"])

df["PCD_S_scale_2.208_predicted"] = df['E_scale'].apply(lambda e: predPCD(e, float(best_s_scale_g1)))
df["PCD_differences_2.208"] = np.abs(df["PCD_S_scale_2.208_predicted"] - df["PCD_lr"])

df["PCD_S_scale_2.186_predicted"] = df['E_scale'].apply(lambda e: predPCD(e, float(best_s_scale)))
df["PCD_differences_2.186"] = np.abs(df["PCD_S_scale_2.186_predicted"] - df["PCD_lr"])


#### Step 11: Creating new csv file with calculated PCD days and differences between predicted and ground truth

In [69]:
df.to_csv('Final_Translating_Time_Pig_S_Scales.csv')