In [289]:
import numpy as np
import pandas as pd
import seaborn as sns
import json
import warnings
import math
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from statsmodels.stats.diagnostic import kstest_normal

import torch
import gpytorch
from gpytorch.kernels import Kernel
from gpytorch.functions import RBFCovariance
from gpytorch.settings import trace_mode
from gpytorch.utils.warnings import GPInputWarning

In [290]:
with open("rNoOuputNoReg.json", "r") as final:
    rNoOuputNoReg=json.load(final)
with open("rOuputNoReg.json", "r") as final:
    rOuputNoReg=json.load(final)
with open("rNoOuputNoReg2.json", "r") as final:
    rNoOuputNoReg2=json.load(final)
with open("rOuputNoReg2.json", "r") as final:
    rOuputNoReg2=json.load(final)
with open("rNoOuputNoSqrt.json", "r") as final:
    rNoOuputNoSqrt=json.load(final)
with open("rOuputNoRegSqrt.json", "r") as final:
    rOuputNoRegSqrt=json.load(final)

In [291]:
def getPredIdx(data):
    idx=[]
    for i in data['respVar']:
        if len(i['index'])>0:
            idx.append(i['index'])
    return idx

def detScore(data, threshold):
    r=[]
    for i in data:
        scores=[]
        sum_=np.array(i['lengthscales']).sum().item()
        for j in i['lengthscales']:
            scores.append(j/sum_)
            thresh=np.array(scores)-threshold
        flag=True
        for j in i['nonpredIdx'][0]:
            if thresh[j]>0:
                for k in getPredIdx(i):
                    if thresh[k]<0:
                        continue
                    else:
                        flag=False
            else:
                flag=False
        if flag==True:
            r.append(1)
        else:
            r.append(0)

    return np.array(r).sum().item()/len(r),r


def thresh(data,min_,max_,n1,n2):
    bestSc=[0,0,0]
    d={}
    for i in range(n1):
        t=np.random.uniform(min_,max_,1).item()
        s=detScore(data,t)[0]
        if s>bestSc[0]:
            bestSc[0]=s
            d[str(s)]=t
            bestSc.sort()
        else:
            continue
    ts=[]
    for i in bestSc:
        ts.append(d[str(i)])
    m=np.array(ts).mean().item()
    diff=np.array(ts)-m
    bestS=0
    bestT=m
    for i in range(n2):
        t=np.random.normal(loc=m, scale=0.02, size=1).item()
        s,r=detScore(data,t)
        if s>bestS:
            bestS=s
            bestT=t
            rr=r
    return t, bestS, rr

In [292]:
thresh(rNoOuputNoReg,0.05,0.25,10000,5000)[:2]

(0.07152569265510335, 0.05333333333333334)

In [293]:
thresh(rOuputNoReg,0.05,0.25,10000,5000)[:2]

(0.10410596038673212, 0.04666666666666667)

In [294]:
thresh(rNoOuputNoReg2,0.05,0.25,10000,5000)[:2]

(0.1690345617497134, 0.04666666666666667)

In [295]:
thresh(rOuputNoReg2,0.05,0.25,10000,5000)[:2]

(0.12000226807804013, 0.04)

In [296]:
thresh(rNoOuputNoSqrt,0.05,0.25,10000,5000)[:2]

(0.12783477574021987, 0.04)

In [297]:
thresh(rOuputNoRegSqrt,0.05,0.25,10000,5000)[:2]

(0.1339029946320876, 0.04)

In [298]:
def findOptimTs(data):
    rt=[]
    for i in data:
        scores=[]
        sum_=np.array(i['lengthscales']).sum().item()
        for j in i['lengthscales']:
            scores.append(j/sum_)
        #min non-pred
        nps=1000
        for j in i['nonpredIdx'][0]:
            if scores[j]<nps:
                nps=scores[j]
        ps=0
        for k in getPredIdx(i):
            if scores[k[0]]>nps:
                ps=scores[k[0]]
        rt.append(0.1*ps+0.9*nps)
    return rt
        

In [336]:
def makeDatasets(data):
    df=pd.DataFrame(columns=['WeightedNonLinear','VarWeightedNonLinear','WeightedCovs','VarWeightedCovs', 'RatioPred','Nfeatures','MeanCovs','VarCov','VarY','loss','binaryScore', 'threshold'])
    df['binaryScore']=thresh(data,0.05,0.25,10000,5000)[-1]
    df['threshold']=findOptimTs(data)
    df['WeightedNonLinear']=0
    df['VarWeightedNonLinear']=0
    df['WeightedCovs']=0
    df['VarWeightedCovs']=0
    df['Nfeatures']=0
    df['MeanCovs']=0
    df['VarCov']=0
    df['VarY']=0
    df['loss']=0

    for i in range(len(data)):
        df.iloc[i,5]=data[i]['nk'][0]
        df.iloc[i,4]=len(data[i]['nonpredIdx'])/data[i]['nk'][0]
        df.iloc[i,-4]=data[i]['sigmay'][0]
        df.iloc[i,-3]=data[i]['ExactMarginalLogLikelihood'][0]
        covs=[]
        for j in range(1,data[i]['nk'][0]):
            for k in range(j):
                covs.append(np.array(data[i]['covs'])[j,k])
        df.iloc[i,6]=np.array(covs).mean().item()
        df.iloc[i,7]=np.array(covs).std().item()
        weightedR=[]
        for j in getPredIdx(data[i]):
            weightedR.append([data[i]['respVar'][j[0]]['respVar'][0], data[i]['respVar'][j[0]]['R^2'][0]])
        df.iloc[i,0]=((np.array(weightedR)[:,0]/np.array(weightedR)[:,0].sum())*np.array(weightedR)[:,1]).mean().item()
        df.iloc[i,1]=((np.array(weightedR)[:,0]/np.array(weightedR)[:,0].sum())*np.array(weightedR)[:,1]).std().item()
        weightedCov=[]
        for j in data[i]['nonpredIdx'][0]:
            for k in getPredIdx(data[i]):
                weightedCov.append([data[i]['respVar'][k[0]]['respVar'][0], np.array(data[i]['covs'])[j,k[0]]])
        df.iloc[i,2]=((np.array(weightedCov)[:,0]/np.array(weightedCov)[:,0].sum())*np.array(weightedCov)[:,1]).mean().item()
        df.iloc[i,3]=((np.array(weightedCov)[:,0]/np.array(weightedCov)[:,0].sum())*np.array(weightedCov)[:,1]).std().item()
    return df


In [337]:
df_rNoOuputNoReg=makeDatasets(rNoOuputNoReg)
df_rOuputNoReg=makeDatasets(rOuputNoReg)
df_rNoOuputReg2=makeDatasets(rNoOuputNoReg2)
df_rOuputReg2=makeDatasets(rOuputNoReg2)
df_rNoOuputSqrt=makeDatasets(rNoOuputNoSqrt)
df_rOuputRegSqrt=makeDatasets(rOuputNoRegSqrt)

In [383]:
print(df_rNoOuputNoReg['threshold'].std())
print(df_rOuputNoReg['threshold'].std())
print(df_rNoOuputReg2['threshold'].std())
print(df_rOuputReg2['threshold'].std())
print(df_rNoOuputSqrt['threshold'].std())
print(df_rOuputRegSqrt['threshold'].std())

0.034532315964089615
0.031990020971476026
0.032198152800986715
0.032742258665612534
0.03344462685880441
0.03128933406001542


In [375]:
from sklearn.linear_model import LinearRegression 
df=df_rNoOuputNoReg.copy()
X=df.iloc[:,:-2]
y=df['threshold'].values
reg=LinearRegression().fit(X, y)

r=pd.DataFrame(reg.coef_,index=['WeightedNonLinear','VarWeightedNonLinear','WeightedCovs','VarWeightedCovs', 'RatioPred','Nfeatures','MeanCovs','VarCov','VarY','loss'], columns=['linear coeficient'])
r['intercept']=reg.intercept_

r

Unnamed: 0,linear coeficient,intercept
WeightedNonLinear,-0.07493254,0.015874
VarWeightedNonLinear,0.04990788,0.015874
WeightedCovs,-0.3889495,0.015874
VarWeightedCovs,-0.3645283,0.015874
RatioPred,1.076827,0.015874
Nfeatures,-0.001890832,0.015874
MeanCovs,-0.08209339,0.015874
VarCov,0.02148331,0.015874
VarY,0.001053679,0.015874
loss,-6.679882e-15,0.015874


In [376]:
df=df_rOuputNoReg.copy()
X=df.iloc[:,:-2]
y=df['threshold'].values
reg=LinearRegression().fit(X, y)

r=pd.DataFrame(reg.coef_,index=['WeightedNonLinear','VarWeightedNonLinear','WeightedCovs','VarWeightedCovs', 'RatioPred','Nfeatures','MeanCovs','VarCov','VarY','loss'], columns=['linear coeficient'])

r['intercept']=reg.intercept_
r

Unnamed: 0,linear coeficient,intercept
WeightedNonLinear,-0.01603417,-0.001068
VarWeightedNonLinear,0.01719866,-0.001068
WeightedCovs,-0.06031309,-0.001068
VarWeightedCovs,-0.08861441,-0.001068
RatioPred,0.9920574,-0.001068
Nfeatures,0.0005515059,-0.001068
MeanCovs,-0.01095043,-0.001068
VarCov,0.001878712,-0.001068
VarY,-0.001584028,-0.001068
loss,2.559709e-15,-0.001068


In [377]:
df=df_rNoOuputReg2.copy()
X=df.iloc[:,:-2]
y=df['threshold'].values
reg=LinearRegression().fit(X, y)

r=pd.DataFrame(reg.coef_,index=['WeightedNonLinear','VarWeightedNonLinear','WeightedCovs','VarWeightedCovs', 'RatioPred','Nfeatures','MeanCovs','VarCov','VarY','loss'], columns=['linear coeficient'])
r['intercept']=reg.intercept_

r

Unnamed: 0,linear coeficient,intercept
WeightedNonLinear,-0.025357,-0.031961
VarWeightedNonLinear,0.03563,-0.031961
WeightedCovs,-0.034333,-0.031961
VarWeightedCovs,0.007067,-0.031961
RatioPred,1.103858,-0.031961
Nfeatures,0.002128,-0.031961
MeanCovs,-0.028843,-0.031961
VarCov,-0.009308,-0.031961
VarY,-0.000313,-0.031961
loss,8e-06,-0.031961


In [378]:
df=df_rOuputReg2.copy()
X=df.iloc[:,:-2]
y=df['threshold'].values
reg=LinearRegression().fit(X, y)

r=pd.DataFrame(reg.coef_,index=['WeightedNonLinear','VarWeightedNonLinear','WeightedCovs','VarWeightedCovs', 'RatioPred','Nfeatures','MeanCovs','VarCov','VarY','loss'], columns=['linear coeficient'])

r['intercept']=reg.intercept_
r

Unnamed: 0,linear coeficient,intercept
WeightedNonLinear,-0.02590452,-0.007152
VarWeightedNonLinear,0.04824983,-0.007152
WeightedCovs,0.248332,-0.007152
VarWeightedCovs,0.23176,-0.007152
RatioPred,0.9923079,-0.007152
Nfeatures,0.0005772521,-0.007152
MeanCovs,-0.07642682,-0.007152
VarCov,-0.0106427,-0.007152
VarY,-0.0003981565,-0.007152
loss,2.009537e-14,-0.007152


In [379]:
df=df_rNoOuputSqrt.copy()
X=df.iloc[:,:-2]
y=df['threshold'].values
reg=LinearRegression().fit(X, y)

r=pd.DataFrame(reg.coef_,index=['WeightedNonLinear','VarWeightedNonLinear','WeightedCovs','VarWeightedCovs', 'RatioPred','Nfeatures','MeanCovs','VarCov','VarY','loss'], columns=['linear coeficient'])

r['intercept']=reg.intercept_
r

Unnamed: 0,linear coeficient,intercept
WeightedNonLinear,-0.02960381,-0.018543
VarWeightedNonLinear,0.01345713,-0.018543
WeightedCovs,-0.06038963,-0.018543
VarWeightedCovs,-0.01735477,-0.018543
RatioPred,1.100257,-0.018543
Nfeatures,0.001180792,-0.018543
MeanCovs,-0.03834294,-0.018543
VarCov,0.01118446,-0.018543
VarY,-0.001669467,-0.018543
loss,6.588796e-14,-0.018543


In [380]:
df=df_rOuputRegSqrt.copy()
X=df.iloc[:,:-2]
y=df['threshold'].values
reg=LinearRegression().fit(X, y)

r=pd.DataFrame(reg.coef_,index=['WeightedNonLinear','VarWeightedNonLinear','WeightedCovs','VarWeightedCovs', 'RatioPred','Nfeatures','MeanCovs','VarCov','VarY','loss'], columns=['linear coeficient'])
r['intercept']=reg.intercept_

r

Unnamed: 0,linear coeficient,intercept
WeightedNonLinear,-0.005609844,0.018837
VarWeightedNonLinear,0.02362954,0.018837
WeightedCovs,0.1800409,0.018837
VarWeightedCovs,-0.03766964,0.018837
RatioPred,0.8636507,0.018837
Nfeatures,-0.0008933043,0.018837
MeanCovs,0.02870715,0.018837
VarCov,0.00737993,0.018837
VarY,-0.0005028082,0.018837
loss,2.889198e-07,0.018837
