In [2]:
#importing libraries
import pandas as pd
import math
import numpy as np
import sys
from sklearn.linear_model import LinearRegression
import copy
import xlsxwriter
from scipy.stats import pearsonr
from scipy.stats import spearmanr
from sklearn.svm import SVR
from sklearn.utils import shuffle

## Calculating Spearman and Pearson Coefficients, first set of data

In [3]:
#replace with paths to your files
PVGPData = pd.read_csv("PVData_wGPpredictions.csv",index_col=0)
Lhx6GPData = pd.read_csv("Lhx6Data_wGPpredictions.csv",index_col=0)

### "singleGPpreds" mean predictions with degree = 1, "doubleGPpreds" mean predictions with degree = 2

In [4]:
PVGPData

Unnamed: 0_level_0,Cell.Type,Duration,Frequency,AmpMult,ModIn,Averages,singleGPpreds,doubleGPpreds
X,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,PV,5000,100,0.5,0.118943,0.118943,0.182607,0.150808
1,PV,250,100,1.0,0.170732,0.195103,0.244864,0.344956
2,PV,500,200,2.0,0.076923,0.228159,0.342078,0.327572
3,PV,100,150,1.5,0.558442,0.558442,0.310078,0.400686
4,PV,500,80,1.5,0.282051,0.261553,0.206164,0.330101
...,...,...,...,...,...,...,...,...
200,PV,250,100,2.5,0.285141,0.285141,0.240064,0.176670
201,PV,1000,10,2.0,0.333333,0.155039,0.117686,0.143814
202,PV,250,100,2.0,0.138340,0.241831,0.241656,0.309618
203,PV,50,150,2.0,0.739130,0.703801,0.300475,0.234157


In [5]:
#Drop duplicate parameter settings so as to not inflate the correlation coefficients
PVDataNoDupes = PVGPData.drop_duplicates(subset=["Duration","Frequency","AmpMult","Averages"]).reset_index(drop=True)
Lhx6DataNoDupes = Lhx6GPData.drop_duplicates(subset=["Duration","Frequency","AmpMult","Averages"]).reset_index(drop=True)

In [6]:
PVDataNoDupes

Unnamed: 0,Cell.Type,Duration,Frequency,AmpMult,ModIn,Averages,singleGPpreds,doubleGPpreds
0,PV,5000,100,0.50,0.118943,0.118943,0.182607,0.150808
1,PV,250,100,1.00,0.170732,0.195103,0.244864,0.344956
2,PV,500,200,2.00,0.076923,0.228159,0.342078,0.327572
3,PV,100,150,1.50,0.558442,0.558442,0.310078,0.400686
4,PV,500,80,1.50,0.282051,0.261553,0.206164,0.330101
...,...,...,...,...,...,...,...,...
124,PV,5000,200,1.25,0.084065,0.084065,0.289489,0.255625
125,PV,500,100,0.50,0.648590,0.648590,0.222819,0.219501
126,PV,100,80,1.00,0.219512,0.219512,0.241564,0.281874
127,PV,500,20,1.50,0.037037,0.037037,0.145733,0.187725


In [7]:
#Calculate coefficients with degree = 1, PV
print("Pearson's correlation value and p-value for LOOXV PV data, SINGLE GP:")
print(pearsonr(PVDataNoDupes["singleGPpreds"],PVDataNoDupes["Averages"]))
print(spearmanr(PVDataNoDupes["singleGPpreds"],PVDataNoDupes["Averages"]))
print("\n")

Pearson's correlation value and p-value for LOOXV PV data, SINGLE GP:
(0.2674025650632229, 0.0021873760597825686)
SpearmanrResult(correlation=0.36711762075134174, pvalue=1.8733267460472225e-05)




In [8]:
#Calculate coefficients with degree = 2, PV
print("Pearson's correlation value and p-value for LOOXV PV data, DOUBLE GP:")
print(pearsonr(PVDataNoDupes["doubleGPpreds"],PVDataNoDupes["Averages"]))
print(spearmanr(PVDataNoDupes["doubleGPpreds"],PVDataNoDupes["Averages"]))
print("\n")

Pearson's correlation value and p-value for LOOXV PV data, DOUBLE GP:
(0.4177010661354104, 8.433670464495497e-07)
SpearmanrResult(correlation=0.523831618962433, pvalue=1.8870868254210278e-10)




In [9]:
#Calculate coefficients with degree = 1, Lhx6
print("Pearson's correlation value and p-value for LOOXV Lhx6 data, SINGLE GP:")
print(pearsonr(Lhx6DataNoDupes["singleGPpreds"],Lhx6DataNoDupes["Averages"]))
print(spearmanr(Lhx6DataNoDupes["singleGPpreds"],Lhx6DataNoDupes["Averages"]))
print("\n")

Pearson's correlation value and p-value for LOOXV Lhx6 data, SINGLE GP:
(0.33199438906546036, 0.00022549736965593383)
SpearmanrResult(correlation=0.3241823941180376, pvalue=0.0003222497991296858)




In [10]:
#Calculate coefficients with degree = 2, Lhx6
print("Pearson's correlation value and p-value for LOOXV Lhx6 data, DOUBLE GP:")
print(pearsonr(Lhx6DataNoDupes["doubleGPpreds"],Lhx6DataNoDupes["Averages"]))
print(spearmanr(Lhx6DataNoDupes["doubleGPpreds"],Lhx6DataNoDupes["Averages"]))
print("\n")

Pearson's correlation value and p-value for LOOXV Lhx6 data, DOUBLE GP:
(0.4189023112101944, 2.122585588715539e-06)
SpearmanrResult(correlation=0.40720554503568424, pvalue=4.306795725042189e-06)




## Calculating Pearson and Spearman coefficients from GPRs, all data

In [11]:
PVGPData_allThree = pd.read_csv("PVData_wGPpredictions_allThreeDataSets.csv",index_col=0)
Lhx6GPData_allThree = pd.read_csv("Lhx6Data_wGPpredictions_allThreeDataSets.csv",index_col=0)

In [12]:
PVGPData_allThree

Unnamed: 0_level_0,Cell.Type,Duration,Frequency,AmpMult,ModIn,Averages,singleGPpreds,doubleGPpreds
X,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,PV,10000,50,1.50,0.210174,0.253153,0.105272,0.220556
1,PV,250,50,0.50,-0.022222,-0.022222,0.167193,0.129412
2,PV,500,10,2.50,-0.200000,0.184689,0.192949,-0.001599
3,PV,100,150,2.00,0.563218,0.563218,0.383001,0.414219
4,PV,30000,80,1.50,-0.013270,0.127842,0.122077,0.206339
...,...,...,...,...,...,...,...,...
256,PV,250,100,1.25,0.214521,0.198810,0.262915,0.370287
257,PV,1000,10,2.00,0.333333,0.155039,0.149729,0.119344
258,PV,250,100,2.00,0.138340,0.241831,0.300183,0.365623
259,PV,50,150,2.00,0.739130,0.703801,0.389575,0.357058


In [13]:
PVDataNoDupes_allThree = PVGPData_allThree.drop_duplicates(subset=["Duration","Frequency","AmpMult","Averages"]).reset_index(drop=True)
Lhx6DataNoDupes_allThree = Lhx6GPData_allThree.drop_duplicates(subset=["Duration","Frequency","AmpMult","Averages"]).reset_index(drop=True)

In [14]:
#Calculating coefficients for degree = 1, PV
print("Pearson's correlation value and p-value for LOOXV PV data, SINGLE GP:")
print(pearsonr(PVDataNoDupes_allThree["singleGPpreds"],PVDataNoDupes_allThree["Averages"]))
print(spearmanr(PVDataNoDupes_allThree["singleGPpreds"],PVDataNoDupes_allThree["Averages"]))
print("\n")

Pearson's correlation value and p-value for LOOXV PV data, SINGLE GP:
(0.3440832754343627, 2.7560600741741234e-05)
SpearmanrResult(correlation=0.4447087225031487, pvalue=2.947607851442741e-08)




In [15]:
#Calculating coefficients for degree = 2, PV
print("Pearson's correlation value and p-value for LOOXV PV data, DOUBLE GP:")
print(pearsonr(PVDataNoDupes_allThree["doubleGPpreds"],PVDataNoDupes_allThree["Averages"]))
print(spearmanr(PVDataNoDupes_allThree["doubleGPpreds"],PVDataNoDupes_allThree["Averages"]))
print("\n")

Pearson's correlation value and p-value for LOOXV PV data, DOUBLE GP:
(0.4750566676518307, 2.3222108571995417e-09)
SpearmanrResult(correlation=0.57219645802205, pvalue=1.0164010737851423e-13)




In [19]:
#Calculating coefficients for degree = 1, Lhx6
print("Pearson's correlation value and p-value for LOOXV Lhx6 data, SINGLE GP:")
print(pearsonr(Lhx6DataNoDupes_allThree["singleGPpreds"],Lhx6DataNoDupes_allThree["Averages"]))
print(spearmanr(Lhx6DataNoDupes_allThree["singleGPpreds"],Lhx6DataNoDupes_allThree["Averages"]))
print("\n")

Pearson's correlation value and p-value for LOOXV Lhx6 data, SINGLE GP:
(0.12405647215930926, 0.14419571645269075)
SpearmanrResult(correlation=0.3710146461818764, pvalue=6.401362509932855e-06)




In [20]:
#Calculating coefficients for degree = 1, Lhx6
print("Pearson's correlation value and p-value for LOOXV Lhx6 data, DOUBLE GP:")
print(pearsonr(Lhx6DataNoDupes_allThree["doubleGPpreds"],Lhx6DataNoDupes_allThree["Averages"]))
print(spearmanr(Lhx6DataNoDupes_allThree["doubleGPpreds"],Lhx6DataNoDupes_allThree["Averages"]))
print("\n")

Pearson's correlation value and p-value for LOOXV Lhx6 data, DOUBLE GP:
(0.0907315168759153, 0.2863668574162317)
SpearmanrResult(correlation=0.5083880774493731, pvalue=1.4317827656601703e-10)


