### Purpose

This is the python example to show how snps behave differently depending on the residual model. 

In [9]:
from miscSupports import terminal_time
import statsmodels.api as sm
from pathlib import Path
import pandas as pd

# Setup working directory
working_directory = Path(Path().resolve(), "example_python.ipynb")
if not working_directory.exists():
    print("WARNING: PATH TO WORKING DIRECTORY COULD NOT BE ESTABLISHED")
else:
    working_directory = working_directory.parent

# Covariant list    
covariant_list = ["Gender", "Age", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10", "Constant"]

# Setup the database
database = pd.read_csv(Path(working_directory, "Data", "CovariantSnp.csv"))
database["Constant"] = [1 for i in range(len(database))]
print(database)
print(f"Loaded Environment {terminal_time()}")

            IID        BMI  Gender   Age       PC1       PC2       PC3  \
0      sample_0  19.717868       1  1948 -0.486175 -0.244768  1.264372   
1      sample_1  27.719200       1  1947  0.039375 -0.563646 -0.086521   
2      sample_2  26.721164       0  1966  0.063110  0.123168  0.123920   
3      sample_3  17.668229       1  1945  1.151507 -0.826457 -0.761967   
4      sample_4  16.813296       1  1939  0.419127  0.522641  0.205381   
..          ...        ...     ...   ...       ...       ...       ...   
478  sample_478  29.449249       1  1947  1.683839 -0.230752  1.138786   
479  sample_479  25.634332       1  1942  0.134313 -0.335876  1.233824   
480  sample_480  23.240923       0  1952 -0.882881  0.901089  1.342438   
481  sample_481  19.545741       0  1966 -1.652558  1.426712  1.182911   
482  sample_482  24.200931       0  1936  0.632738 -1.544808 -1.423836   

          PC4       PC5       PC6  ...  rs123  rs234  rs345  rs456  rs567  \
0   -0.272320 -1.454021 -0.998124 

### Model 1

regress BMI on G, sex, YoB, PCs

In [10]:

model = sm.OLS(database["BMI"], database[["rs012"] + covariant_list], missing="drop").fit()

print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                    BMI   R-squared:                       0.022
Model:                            OLS   Adj. R-squared:                 -0.005
Method:                 Least Squares   F-statistic:                    0.7981
Date:                Mon, 17 May 2021   Prob (F-statistic):              0.662
Time:                        10:18:42   Log-Likelihood:                -1428.5
No. Observations:                 483   AIC:                             2885.
Df Residuals:                     469   BIC:                             2944.
Df Model:                          13                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
rs012          0.2277      0.269      0.845      0.3

### Model 2

Residualise BMI and then regress residualised BMI on G

In [14]:
res = sm.OLS(database["BMI"], database[covariant_list], missing="drop").fit()
print(res.resid)

model = sm.OLS(res.resid, database[["rs012", "Constant"]]).fit()
print(model.summary())

0     -2.210700
1      4.913190
2      4.238063
3     -4.548452
4     -5.640518
         ...   
478    8.275527
479    4.045467
480    1.342997
481   -3.569356
482    2.283283
Length: 483, dtype: float64
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.001
Model:                            OLS   Adj. R-squared:                 -0.001
Method:                 Least Squares   F-statistic:                    0.7106
Date:                Mon, 17 May 2021   Prob (F-statistic):              0.400
Time:                        10:21:41   Log-Likelihood:                -1428.6
No. Observations:                 483   AIC:                             2861.
Df Residuals:                     481   BIC:                             2869.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
      

### Model 3
Residualise G and then regress BMI on residualised G

In [24]:
g_res = sm.OLS(database["rs012"], database[covariant_list + ["Constant"]], missing="drop").fit()
g_res = pd.concat([pd.DataFrame(g_res.resid, columns=["rs012"]), database["Constant"]], axis=1)

print(g_res)
# print(gres.resid)
# 
model = sm.OLS(database["BMI"], g_res, missing="drop").fit()
print(model.summary())


        rs012  Constant
0    1.175758         1
1    0.100444         1
2    0.020096         1
3    0.058069         1
4    0.117982         1
..        ...       ...
478  0.939127         1
479 -0.783326         1
480 -0.738677         1
481 -0.866035         1
482  0.981525         1

[483 rows x 2 columns]
                            OLS Regression Results                            
Dep. Variable:                    BMI   R-squared:                       0.001
Model:                            OLS   Adj. R-squared:                 -0.001
Method:                 Least Squares   F-statistic:                    0.7180
Date:                Mon, 17 May 2021   Prob (F-statistic):              0.397
Time:                        14:35:23   Log-Likelihood:                -1433.5
No. Observations:                 483   AIC:                             2871.
Df Residuals:                     481   BIC:                             2879.
Df Model:                           1                   