In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 26 2021
@author: nooryoussef
"""
import numpy as np
import pandas as pd
import pickle
import misc_functions as mf
import stability_functions as sf
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import linear_model

In [2]:
# get empirical distributions 
main_dir = "../output/"
protein  = '1qhw'
Ne = ('Ne2', 1e2)

#load empirical results datafrane
qhw_rst = pd.read_csv("../data/conc_data_1qhw_Ne2_nsubs_5e2_0_500.csv")
data = pd.read_csv("../data/" + protein + "_paired_pinew_pires_0_500.csv")
PI_R = list(data["propensity"]) + list(set(data["pi_initial"]))

# get empricial distributions
vi  = list(qhw_rst["pi_initial"])
vr  = data["propensity"]
vrt = list(qhw_rst['res_time'])


## Null model where pi_res, pi_initial are drawn from uniform

1. For i in 1...1000
    1. Select an element from uniform[0,1] at random. Call it Init_i. 
    2. Select an element of vrt at random. Call it T. 
    3. Select T elements from uniform[0,1] at random. Set Ave_i = mean of these elements
    4. Calculate MAMI_i = Ave_i-Init_i
    5. Calculate MSLR


In [3]:
rndm_pi_res = np.random.uniform(0,1, (10000,500))
windows     = np.random.choice(vrt, 10000)

Init= []; Ave = []; M_AMI = []; M_SLR = []; pi_r = []
for i in range(10000):
    Init_i  = rndm_pi_res[i, 0]
    T       = windows[i]
    pi_r_i  = rndm_pi_res[i, 1:T+1]
    Ave_i   = np.mean(pi_r_i)
    M_AMI_i = Ave_i - Init_i
    
    X       = np.array([x for x in range(T)]).reshape(-1,1)
    y       = np.array(pi_r_i).reshape(-1,1)
    lm      = linear_model.LinearRegression()
    model   = lm.fit(X,y)
    M_SLR_i = lm.coef_[0][0]
        
    #append to lists 
    Init.append(Init_i)
    pi_r.extend(list(pi_r_i))
    Ave.append(Ave_i)
    M_AMI.append(M_AMI_i)
    M_SLR.append(M_SLR_i)
    
null_df = pd.DataFrame({"M_AMI":M_AMI, "M_SLR":M_SLR, "AVG_pi":Ave, "pi_initial":Init })
null_df.to_csv("../data/null_model_uniform_distribution.csv", index = False)

np.save("../data/null_model_uniform_pi_new_and_pi_res_PI_RES.npy", np.array(pi_r))

## Null model where pi_res, pi_initial are drawn from normal

1. For i in 1...1000
    1. Select an element from normal[0, 0.1] at random. Call it Init_i. 
    2. Select an element of vrt at random. Call it T. 
    3. Select T elements from uniform[0, 0.1] at random. Set Ave_i = mean of these elements
    4. Calculate MAMI_i = Ave_i-Init_i
    5. Calculate MSLR

In [4]:
rndm_pi_res = np.random.normal(0,0.1, (10000,500))
windows     = np.random.choice(vrt, 10000)

Init= []; Ave = []; M_AMI = []; M_SLR = []; pi_r = []
for i in range(10000):
    Init_i  = rndm_pi_res[i, 0]
    T       = windows[i]
    pi_r_i  = rndm_pi_res[i, 1:T+1]
    Ave_i   = np.mean(pi_r_i)
    M_AMI_i = Ave_i - Init_i
    
    X       = np.array([x for x in range(T)]).reshape(-1,1)
    y       = np.array(pi_r_i).reshape(-1,1)
    lm      = linear_model.LinearRegression()
    model   = lm.fit(X,y)
    M_SLR_i = lm.coef_[0][0]
        
    #append to lists 
    Init.append(Init_i)
    pi_r.extend(list(pi_r_i))
    Ave.append(Ave_i)
    M_AMI.append(M_AMI_i)
    M_SLR.append(M_SLR_i)
    
null_df = pd.DataFrame({"M_AMI":M_AMI, "M_SLR":M_SLR, "AVG_pi":Ave, "pi_initial":Init })
null_df.to_csv("../data/null_model_normal_distribution.csv", index = False)

np.save("../data/null_model_normal_pi_new_and_pi_res_PI_RES.npy", np.array(pi_r))

## Null model where pi_res, pi_initial are sampled from empirical pi_res 

1. For i in 1...1000
    1. Select an element from pi_res at random. Call it Init_i. 
    2. Select an element of vrt at random. Call it T. 
    3. Select T elements from pi_res at random. Set Ave_i = mean of these elements
    4. Calculate MAMI_i = Ave_i-Init_i
    5. Calculate MSLR


In [6]:
rndm_pi_res = np.random.choice(PI_R, (10000,500))
windows     = np.random.choice(vrt, 10000)

Init= []; Ave = []; M_AMI = []; M_SLR = []; pi_r = []; pi_initial = []
for i in range(10000):
    Init_i  = rndm_pi_res[i, 0]
    T       = windows[i]
    pi_r_i  = rndm_pi_res[i, 1:T+1]
    Ave_i   = np.mean(pi_r_i)
    M_AMI_i = Ave_i - Init_i
    
    X       = np.array([x for x in range(T)]).reshape(-1,1)
    y       = np.array(pi_r_i).reshape(-1,1)
    lm      = linear_model.LinearRegression()
    model   = lm.fit(X,y)
    M_SLR_i = lm.coef_[0][0]
        
    #append to lists 
    Init.append(Init_i)
    pi_initial.extend([Init_i]*len(pi_r_i))
    pi_r.extend(list(pi_r_i))
    Ave.append(Ave_i)
    M_AMI.append(M_AMI_i)
    M_SLR.append(M_SLR_i)
    
null_df = pd.DataFrame({"M_AMI":M_AMI, "M_SLR":M_SLR, "AVG_pi":Ave, "pi_initial":Init, "res_time": list(windows)})
null_df.to_csv("../data/null_model_empirical_distribution.csv", index = False)

formated_data = pd.DataFrame({"pi_initial": pi_initial, "pi_res": pi_r})
formated_data.to_csv("../data/null_model_empirical_pi_new_and_pi_res.csv", index = False)

np.save("../data/null_model_pi_new_and_pi_res_sampled_from_empirical_PI_RES.npy", np.array(pi_r))