**Input:** 
- UK Biobank cancer-related fields
- ACCEL dataset (Katori et al. 2022)


**Process:**
 - Extract the subset population with ACCEL dataset available
 - See whether cancer makes any difference in ACCEL features

**Output:**


# Preparation (Execute all in this section!)

## Import libraries & set environment variables

In [1]:
import collections
import csv
from datetime import datetime
import os
import numpy as np
import pandas as pd
from pathlib import Path
import polars as pl
import re
from matplotlib import pyplot as plt

from scipy import stats
from sklearn.preprocessing import MinMaxScaler
from sklearn import linear_model
import statsmodels.api as sm

from contextlib import redirect_stdout
from io import StringIO

import warnings
# suppress DeprecationWarning messages
warnings.filterwarnings("ignore", category=DeprecationWarning)


# Draw flowcharts using Mermaid
import base64
from IPython.display import Image, display

def mm(graph):
  graphbytes = graph.encode("ascii")
  base64_bytes = base64.b64encode(graphbytes)
  base64_string = base64_bytes.decode("ascii")
  display(Image(url="https://mermaid.ink/img/" + base64_string))


dir_home = Path(os.getcwd()).parent.parent.parent
os.chdir(dir_home)
print("Current directory (check that it's your home directory):", os.getcwd())

Current directory (check that it's your home directory): J:\sugai\UKBiobank


In [6]:
mm("""
graph TD;
    classDef dark fill:#DDDDDD, stroke:#DDDDDD, font-size:12px
    UKBB[UKBB population: 502,386] --> ACCEL_pop[ACCEL data available: 81,496];
    ACCEL_pop --> ACCEL_data[ACCEL data];
    ACCEL_pop ---> Params[Questionnaire];
    ACCEL_data --> ACCEL_index[Sleep indexes];
    ACCEL_data --> ACCEL_dim[Sleep dimensions];
    ACCEL_data --> ACCEL_cluster[Sleep clusters];
    ACCEL_index --> Stat[Stat tests];
    ACCEL_dim --> Stat[Stat tests];
    ACCEL_cluster --> Stat[Stat tests];
    Params --> Stat[Stat tests];
""")


## Functions

In [2]:
def merge_files(list_files, dir):
    list_files = [os.path.join(dir, file) for file in list_files]

    # Initialize an empty DataFrame
    df_merged = pl.DataFrame()
    
    print(datetime.now(), "Start processing", len(list_files), "files")

    # Loop over each file in the list and add it to the merged DataFrame
    for i, file in enumerate(list_files):
        func_print(i + 1)
        # Read in the current file as a DataFrame
        df_current = pl.read_csv(file, separator="\t")

        # Get the column name for the current file's specific column
        column_name = os.path.splitext(os.path.basename(file))[0]

        # Rename the specific column to the name of the current file
        #df_current = df_current.rename(columns={df_current.columns[1]: column_name})
        df_current = df_current.rename({df_current.columns[1]: column_name})

        # If this is the first file being added, simply set df_merged to df_current
        if df_merged.shape == (0, 0):
            df_merged = df_current

        # Otherwise, merge df_current with df_merged on the "eid" column
        else:
            df_merged = df_merged.join(df_current, on="eid", how="outer")
    print(datetime.now(), "Done processing.")
    return df_merged
    
    
def func_list_to_polars(l):
    result_list_numpy = np.array(l)
    header = result_list_numpy[0]
    data = result_list_numpy[1:]
    # Create a dictionary with each key-value pair representing a column and its data
    data_dict = {header[i]: data[:, i] for i in range(data.shape[1])}
    # Convert the dictionary to a Polars DataFrame
    return pl.DataFrame(data_dict)
    

def func_print(i, suffix=""):
    if i < 10 or str(i)[1:].count('0') == len(str(i))-1:
        now = datetime.now()
        print(f"{now.strftime('%Y-%m-%d %H:%M:%S')}: {i}", suffix)


# Define a function to convert the date string into a single integer
def date_string_to_int(date_str):
    if not date_str:
        result = "NA"
    else:
        date_start = datetime.strptime("1970-10-10", '%Y-%m-%d').date()
        result = (date_str - date_start).days
    return result

        
# Define the custom date conversion function
def convert_date_format(date_str: str) -> str:
    if date_str == "NA":
        formatted_date = "NA"
    else:
        date_obj = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S")
        formatted_date = date_obj.strftime("%Y-%m-%d")
    return formatted_date


# Convert anything to np.float32
def pl_to_float(df):
    result = df.to_numpy()
    try:
        result = result.astype(float)
    except:
        if "-" in result[0]: # Date-time format
            # Convert the date strings to datetime objects
            result = np.array([datetime.fromisoformat(date_str) for date_str in result])

            # Extract the date component and convert to integer
            result = np.array([int(date_obj.timestamp()) for date_obj in result])
    return result


# Check normality
def test_normality(pheno_list_continuous, df):
    pval_min = 1
    for pheno in pheno_list_continuous:
        print(pheno)
        try:
            np_df = pl_to_float(df[pheno])
            pval = stats.kstest(np_df, "norm")[1]
            print("P-value =", pval)
            if not np.isnan(pval):
                pval_min = min(pval, pval_min)
        except:
            print("Skip because the data is not valid ")
    if pval_min < 0.05 / len(pheno_list_continuous):
        is_normal = False
    else:
        is_normal = True
    print()
    print("Minimum p-value = ", pval_min, ", therefore normality =", is_normal)


def zscore(x, axis = None):
    xmean = x.mean(axis=axis, keepdims=True)
    xstd  = np.std(x, axis=axis, keepdims=True)
    # Replace zero values in xstd with np.nan to prevent division by zero
    xstd = np.where(xstd==0, np.nan, xstd)
    zscore = (x-xmean)/xstd
    # Replace np.nan values with 1 in zscore
    zscore = np.where(np.isnan(zscore), 1, zscore)
    return zscore


def func_linreg(df_onehot, pheno_objective, pheno_list_confounding, pheno_list_explanatory):
    result_list = [["Objective_phenotype", "Explanatory_phenotype_main",
                    "Explanatory_phenotype_confounding",
                    "N", "N_zero",
                    "params_intercept", "params_X", 
                    "pval_intercept", "pval_X", 
                    "r^2", "Condition_number"]]
    
    # Test explanatory variables one by one, adding confounding factors each time
    for pheno in pheno_list_explanatory:
        pheno_list_X_Y = [pheno] + pheno_list_confounding + [pheno_objective]
        pheno_list_X = [pheno] + pheno_list_confounding
        
        # Dataset without empty fields for linear regression
        df_temp = df_onehot[pheno_list_X_Y].drop_nulls() 
        N = df_temp.shape[0]
        N0 = df_temp.filter(pl.col(pheno_objective) == 0).shape[0]
        
        
        # Stop if too few instances
        if df_temp.shape[0] < 10:
            result_list.append([pheno_objective, pheno, 
                                ",".join(pheno_list_confounding),
                                N, N0,
                                "NA_instances_fewerThan_10", "NA_instances_fewerThan_10",
                                "NA_instances_fewerThan_10", "NA_instances_fewerThan_10",
                                "NA_instances_fewerThan_10", "NA_instances_fewerThan_10"
                               ])
            continue

        X = df_temp[pheno_list_X].to_numpy().astype(np.float64)
        Y = df_temp[pheno_objective].to_numpy().astype(np.float64)

        var_pheno_explanatory = np.max(X[:, 0]) - np.min(X[:, 0])
        var_pheno_objective = np.max(Y) - np.min(Y)     
        if (var_pheno_explanatory == 0):
            result_list.append([pheno_objective, pheno, 
                                ",".join(pheno_list_confounding),
                                N, N0,
                                "NA_X_variance_is_zero", "NA_X_variance_is_zero",
                                "NA_X_variance_is_zero", "NA_X_variance_is_zero",
                                "NA_X_variance_is_zero", "NA_X_variance_is_zero"
                               ])
        elif (var_pheno_objective == 0):
            result_list.append([pheno_objective, pheno,
                                ",".join(pheno_list_confounding),
                                N, N0,
                                "NA_Y_variance_is_zero", "NA_Y_variance_is_zero",
                                "NA_Y_variance_is_zero", "NA_Y_variance_is_zero",
                                "NA_Y_variance_is_zero", "NA_Y_variance_is_zero",
                               ])        
        else:
            # Normalize X & scale Y
            X = zscore(X, axis=0)
            scaler = MinMaxScaler(feature_range=(0, 1))
            Y = scaler.fit_transform(Y.reshape(-1, 1)).flatten()



            # Add a constant to the independent variable
            X = sm.add_constant(X)

            # Perform the linear regression analysis
            model = sm.OLS(Y, X)
            results = model.fit()

            # Summary of the linear regression analysis
            # print(results.summary())
            result_list.append([pheno_objective, pheno,
                                ",".join(pheno_list_confounding),
                                N, N0,
                                results.params[0], results.params[1],
                                results.pvalues[0], results.pvalues[1],
                                results.rsquared, results.condition_number,
                               ])
    
    return result_list

## Variables

In [15]:
list_files = [
    "12530_20118-0.0.txt", # Urban-rural
    # ACCEL-UKBB
    "26796_90010-0.0.txt", # 90010 Start time of wear
    "26803_90018-0.0.txt", # 90018	Daylight savings crossover
    "26976_90192-0.0.txt", # Temperature average
    "26977_90193-0.0.txt", # Temperature standard deviation
    "26978_90194-0.0.txt", # Temperature minimum
    "26979_90195-0.0.txt", # Temperature maximum
    "28404_110006-0.0.txt", # Invitation to physical activity study, date sent
    # ACCEL-KATORI
    '30816_ST_long_mean.txt',
    '30817_ST_long_sd.txt',
    '30818_WT_long_mean.txt',
    '30819_WT_long_sd.txt',
    '30820_ST_short_mean.txt',
    '30821_ST_short_sd.txt',
    '30822_WT_short_mean.txt',
    '30823_WT_short_sd.txt',
    '30824_long_window_len_mean.txt',
    '30825_long_window_len_sd.txt',
    '30826_long_window_num_mean.txt',
    '30827_long_window_num_sd.txt',
    '30828_short_window_len_mean.txt',
    '30829_short_window_len_sd.txt',
    '30830_short_window_num_mean.txt',
    '30831_short_window_num_sd.txt',
    '30832_phase_mean.txt',
    '30833_phase_sd.txt',
    '30834_max_period.txt',
    '30835_amplitude.txt',
    '30836_sleep_percentage.txt',
    '30837_continuous_day.txt',
    '30838_age.txt',
    '30839_sex.txt',
    '30841_group_eid_old.txt',
    '30842_abnormal_group_eid_old.txt',
    '30846_group_five.txt',]

columns_rename = {"20118-0.0": "urban_rural", 
                  "90010-0.0": "date_start", 
                  "90018-0.0": "daylight_savings_crossover",
                  "90192-0.0": "temperature_mean",
                  "90193-0.0": "temperature_SD", 
                  "90194-0.0": "temperature_min",
                  "90195-0.0": "temperature_max",
                  "110006-0.0": "date_sent",
                 }


columns_float32 = [
 'temperature_mean',
 'temperature_SD', 
 'temperature_min',
 'temperature_max',
 'ST_long_mean',
 'ST_long_sd',
 'WT_long_mean',
 'WT_long_sd',
 'ST_short_mean',
 'ST_short_sd',
 'WT_short_mean',
 'WT_short_sd',
 'long_window_len_mean',
 'long_window_len_sd',
 'long_window_num_mean',
 'long_window_num_sd',
 'short_window_len_mean',
 'short_window_len_sd',
 'short_window_num_mean',
 'short_window_num_sd',
 'phase_mean',
 'phase_sd',
 'max_period',
 'amplitude',
 'sleep_percentage',
 'continuous_day',
 'age',
 'sex',]

columns_categorical = [
    "urban_rural",
    "group_eid_old", "abnormal_group_eid_old", "group_five"
]

## Directories

In [3]:
dir_source = os.path.join("data", "ukbb", "4047708_673112_all", 
                          "accel_ukbb", "split", "accel_only")
dir_out = os.path.join("analysis", "specific", "analysis_20230713_01_ACCEL_season")
dir_out_plot = os.path.join(dir_out, "plot")
dir_out_hist = os.path.join(dir_out, "plot", "histogram")
dir_out_pairplot = os.path.join(dir_out, "plot", "pair_plot")

# Create output folder only if it doesn't exist yet
if not os.path.exists(dir_out):
    os.makedirs(dir_out)
if not os.path.exists(dir_out_plot):
    os.makedirs(dir_out_plot)
if not os.path.exists(dir_out_hist):
    os.makedirs(dir_out_hist)
if not os.path.exists(dir_out_pairplot):
    os.makedirs(dir_out_pairplot)

list_files = os.listdir(dir_source)
list_files = sorted(list_files)[-34:]
list_files

['30815_day_num.txt',
 '30816_ST_long_mean.txt',
 '30817_ST_long_sd.txt',
 '30818_WT_long_mean.txt',
 '30819_WT_long_sd.txt',
 '30820_ST_short_mean.txt',
 '30821_ST_short_sd.txt',
 '30822_WT_short_mean.txt',
 '30823_WT_short_sd.txt',
 '30824_long_window_len_mean.txt',
 '30825_long_window_len_sd.txt',
 '30826_long_window_num_mean.txt',
 '30827_long_window_num_sd.txt',
 '30828_short_window_len_mean.txt',
 '30829_short_window_len_sd.txt',
 '30830_short_window_num_mean.txt',
 '30831_short_window_num_sd.txt',
 '30832_phase_mean.txt',
 '30833_phase_sd.txt',
 '30834_max_period.txt',
 '30835_amplitude.txt',
 '30836_sleep_percentage.txt',
 '30837_continuous_day.txt',
 '30838_age.txt',
 '30839_sex.txt',
 '30840_cluster.txt',
 '30841_group_eid_old.txt',
 '30842_abnormal_group_eid_old.txt',
 '30843_axivity_age.txt',
 '30844_axivity_month.txt',
 '30845_cluster_alphabet.txt',
 '30846_group_five.txt',
 '30847_group_eight.txt',
 '30848_abnormal_group_eight.txt']

In [16]:
for i, file_source in enumerate(list_files):
    path_source = os.path.join(dir_source, file_source)
    df = pl.read_csv(path_source, separator="\t", infer_schema_length=0)
    if i == 0:
        df_all = df.clone()
    else:
        df_all = df_all.join(df, on="eid")

# Drop empty columns
df_all = df_all.filter(pl.col("ST_long_mean") != "NA")

# Rename column names
df_all = df_all.rename(columns_rename)

# Convert string "NA" to Null
for col_name in df_all.columns:
    df_all = df_all.with_columns(
        pl.when(pl.col(col_name) == "NA")
        .then(pl.lit(None))
        .otherwise(pl.col(col_name))
        .alias(col_name)
    )

# Convert to numbers where applicable
for col_name in columns_float32:
    df_all = df_all.with_columns(
        pl.when(pl.col(col_name) == "NA")
        .then(pl.lit(None))
        .otherwise(pl.col(col_name))
        .cast(pl.Float32)
        .alias(col_name)
    )

# Convert date
df_all = df_all.with_columns(
    pl.col("date_sent")
    .str.strptime(pl.Date, fmt="%Y-%m-%d", strict=False)
    .alias("date_sent")
) 

df_all = df_all.with_columns(
    pl.col("date_start")
    .apply(convert_date_format, return_dtype=pl.Utf8)
    .str.strptime(pl.Date, fmt="%Y-%m-%d", strict=False)
    .alias("date_start")
)

# Extract months
df_all = df_all.with_columns(
    pl.col("date_start")
    .cast(str)
    .str.slice(5, length=2)
    .cast(int)
    .alias("month_start")
)

df_all = df_all.with_columns(
    pl.col("date_sent")
    .cast(str)
    .str.slice(5, length=2)
    .cast(int)
    .alias("month_sent")
)

df_all

eid,urban_rural,date_start,daylight_savings_crossover,temperature_mean,temperature_SD,temperature_min,temperature_max,date_sent,ST_long_mean,ST_long_sd,WT_long_mean,WT_long_sd,ST_short_mean,ST_short_sd,WT_short_mean,WT_short_sd,long_window_len_mean,long_window_len_sd,long_window_num_mean,long_window_num_sd,short_window_len_mean,short_window_len_sd,short_window_num_mean,short_window_num_sd,phase_mean,phase_sd,max_period,amplitude,sleep_percentage,continuous_day,age,sex,group_eid_old,abnormal_group_eid_old,group_five,month_start,month_sent
str,str,date,str,f32,f32,f32,f32,date,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,str,str,str,i64,i64
"""1000076""","""5""",2014-03-17,"""0""",20.9,1.8,9.8,24.200001,2014-02-20,408.083344,83.084503,53.916668,22.49892,34.666668,55.541775,2.833333,4.624812,462.0,93.65184,1.0,0.0,56.25,49.820301,0.666667,0.745356,960.75,35.796589,1440.0,0.641584,0.307755,6.0,69.0,0.0,"""4b""",,"""4""",3,2
"""1000091""","""5""",2015-06-27,"""0""",21.1,0.8,9.9,25.4,2015-06-18,215.333328,98.566246,23.0,14.309088,73.333336,83.993385,22.916666,27.380676,286.0,29.759031,0.833333,0.372678,36.09375,38.210037,2.666667,1.699673,960.416687,58.078552,1422.0,0.477603,0.20735,6.0,67.0,1.0,"""3b""",,"""3""",6,6
"""1000162""","""8""",2015-01-28,"""0""",19.9,2.1,9.4,23.299999,2015-01-08,512.375,45.022736,110.625,56.505947,39.125,38.664219,18.75,21.185196,623.0,99.460419,1.0,0.0,46.299999,35.205967,1.25,1.089725,926.75,6.675515,1440.0,0.740725,0.392535,4.0,51.0,1.0,"""4b""",,"""4""",1,1
"""1000171""","""5""",2014-09-26,"""0""",20.200001,1.1,9.7,22.700001,2014-09-12,461.299988,56.675037,64.599998,33.898968,4.0,8.0,0.8,1.6,525.900024,49.819073,1.0,0.0,24.0,0.0,0.2,0.4,895.299988,10.42401,1440.0,0.670662,0.325625,5.0,59.0,0.0,"""4b""",,"""4""",9,9
"""1000196""","""6""",2015-10-22,"""1""",21.1,1.3,7.3,23.4,2015-10-08,425.75,47.03257,27.916666,7.683406,24.5,32.601124,10.666667,12.171232,453.666656,46.807823,1.0,0.0,52.75,43.499283,0.666667,0.471405,992.75,42.702213,1440.0,0.655325,0.318171,6.0,54.0,0.0,"""4b""",,"""4""",10,10
"""1000209""","""5""",2014-11-15,"""0""",20.4,1.6,9.9,23.4,2014-11-07,412.100006,99.947189,43.400002,6.952697,16.5,27.828043,14.0,26.284977,455.5,102.475853,1.0,0.0,76.25,61.75,0.4,0.489898,899.099976,15.618579,1440.0,0.619383,0.29875,5.0,53.0,0.0,"""4b""",,"""4""",11,11
"""1000235""","""5""",2014-12-20,"""0""",20.799999,1.8,5.9,24.200001,2014-12-05,449.25,14.082347,50.75,22.071381,7.5,9.34523,7.166667,5.763872,500.0,26.821943,1.0,0.0,22.0,11.07926,0.666667,0.471405,866.666687,43.436417,1440.0,0.641835,0.32037,6.0,53.0,0.0,"""4b""",,"""4""",12,12
"""1000254""","""5""",2013-06-10,"""0""",20.4,1.6,7.9,25.5,2013-05-10,239.916672,125.897255,68.5,41.241161,80.5,97.910675,21.833334,19.766695,370.100006,87.373566,0.833333,0.372678,76.75,77.558044,1.333333,0.745356,962.25,13.091824,1440.0,0.49903,0.224016,6.0,58.0,1.0,"""3b""",,"""3""",6,5
"""1000260""","""5""",2013-07-17,"""0""",20.6,1.2,9.9,23.4,2013-07-09,327.166656,43.713715,21.583334,13.507457,15.416667,15.360709,0.916667,1.096079,348.75,42.5742,1.0,0.0,24.5,13.052777,0.666667,0.471405,872.0,32.770668,1422.0,0.5417,0.239525,6.0,56.0,0.0,"""4b""",,"""4""",7,7
"""1000337""","""5""",2014-11-15,"""0""",20.200001,1.4,8.4,25.200001,2014-10-31,399.166656,56.688965,71.0,19.45936,28.0,57.202126,3.0,6.708204,470.166656,73.326706,1.0,0.0,93.0,80.5,0.333333,0.471405,848.833313,26.497904,1440.0,0.599609,0.297569,6.0,56.0,1.0,"""4b""",,"""4""",11,10


In [50]:
# Convert to one-hot
df_onehot = df_all.clone()

for pheno in columns_categorical:
    onehot = df_onehot[pheno].to_dummies()
    df_onehot = pl.concat([df_onehot, onehot], how="horizontal").drop(pheno)
    
df_onehot

eid,date_start,daylight_savings_crossover,temperature_mean,temperature_SD,temperature_min,temperature_max,date_sent,ST_long_mean,ST_long_sd,WT_long_mean,WT_long_sd,ST_short_mean,ST_short_sd,WT_short_mean,WT_short_sd,long_window_len_mean,long_window_len_sd,long_window_num_mean,long_window_num_sd,short_window_len_mean,short_window_len_sd,short_window_num_mean,short_window_num_sd,phase_mean,phase_sd,max_period,amplitude,sleep_percentage,continuous_day,age,sex,month_start,month_sent,urban_rural_1,urban_rural_11,urban_rural_12,urban_rural_13,urban_rural_16,urban_rural_17,urban_rural_2,urban_rural_3,urban_rural_5,urban_rural_6,urban_rural_7,urban_rural_8,urban_rural_9,urban_rural_null,group_eid_old_1,group_eid_old_2a,group_eid_old_2b,group_eid_old_3a,group_eid_old_3b,group_eid_old_4a,group_eid_old_4b,group_eid_old_5,group_eid_old_null,abnormal_group_eid_old_3b-1,abnormal_group_eid_old_3b-2,abnormal_group_eid_old_4b-1,abnormal_group_eid_old_4b-2,abnormal_group_eid_old_4b-3,abnormal_group_eid_old_4b-4,abnormal_group_eid_old_4b-5,abnormal_group_eid_old_4b-6,abnormal_group_eid_old_null,group_five_1,group_five_2,group_five_3,group_five_4,group_five_5,group_five_null
str,date,str,f32,f32,f32,f32,date,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,i64,i64,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8,u8
"""1000076""",2014-03-17,"""0""",20.9,1.8,9.8,24.200001,2014-02-20,408.083344,83.084503,53.916668,22.49892,34.666668,55.541775,2.833333,4.624812,462.0,93.65184,1.0,0.0,56.25,49.820301,0.666667,0.745356,960.75,35.796589,1440.0,0.641584,0.307755,6.0,69.0,0.0,3,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
"""1000091""",2015-06-27,"""0""",21.1,0.8,9.9,25.4,2015-06-18,215.333328,98.566246,23.0,14.309088,73.333336,83.993385,22.916666,27.380676,286.0,29.759031,0.833333,0.372678,36.09375,38.210037,2.666667,1.699673,960.416687,58.078552,1422.0,0.477603,0.20735,6.0,67.0,1.0,6,6,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
"""1000162""",2015-01-28,"""0""",19.9,2.1,9.4,23.299999,2015-01-08,512.375,45.022736,110.625,56.505947,39.125,38.664219,18.75,21.185196,623.0,99.460419,1.0,0.0,46.299999,35.205967,1.25,1.089725,926.75,6.675515,1440.0,0.740725,0.392535,4.0,51.0,1.0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
"""1000171""",2014-09-26,"""0""",20.200001,1.1,9.7,22.700001,2014-09-12,461.299988,56.675037,64.599998,33.898968,4.0,8.0,0.8,1.6,525.900024,49.819073,1.0,0.0,24.0,0.0,0.2,0.4,895.299988,10.42401,1440.0,0.670662,0.325625,5.0,59.0,0.0,9,9,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
"""1000196""",2015-10-22,"""1""",21.1,1.3,7.3,23.4,2015-10-08,425.75,47.03257,27.916666,7.683406,24.5,32.601124,10.666667,12.171232,453.666656,46.807823,1.0,0.0,52.75,43.499283,0.666667,0.471405,992.75,42.702213,1440.0,0.655325,0.318171,6.0,54.0,0.0,10,10,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
"""1000209""",2014-11-15,"""0""",20.4,1.6,9.9,23.4,2014-11-07,412.100006,99.947189,43.400002,6.952697,16.5,27.828043,14.0,26.284977,455.5,102.475853,1.0,0.0,76.25,61.75,0.4,0.489898,899.099976,15.618579,1440.0,0.619383,0.29875,5.0,53.0,0.0,11,11,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
"""1000235""",2014-12-20,"""0""",20.799999,1.8,5.9,24.200001,2014-12-05,449.25,14.082347,50.75,22.071381,7.5,9.34523,7.166667,5.763872,500.0,26.821943,1.0,0.0,22.0,11.07926,0.666667,0.471405,866.666687,43.436417,1440.0,0.641835,0.32037,6.0,53.0,0.0,12,12,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
"""1000254""",2013-06-10,"""0""",20.4,1.6,7.9,25.5,2013-05-10,239.916672,125.897255,68.5,41.241161,80.5,97.910675,21.833334,19.766695,370.100006,87.373566,0.833333,0.372678,76.75,77.558044,1.333333,0.745356,962.25,13.091824,1440.0,0.49903,0.224016,6.0,58.0,1.0,6,5,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,


In [51]:
file_out = os.path.join(dir_out, "one_hot.csv")
df_onehot.write_csv(file_out)

# Linear regression

In [8]:
file_onehot = os.path.join(dir_out, "one_hot.csv")
df_onehot = pl.read_csv(file_onehot, try_parse_dates=True)
df_onehot

eid,date_start,daylight_savings_crossover,temperature_mean,temperature_SD,temperature_min,temperature_max,date_sent,ST_long_mean,ST_long_sd,WT_long_mean,WT_long_sd,ST_short_mean,ST_short_sd,WT_short_mean,WT_short_sd,long_window_len_mean,long_window_len_sd,long_window_num_mean,long_window_num_sd,short_window_len_mean,short_window_len_sd,short_window_num_mean,short_window_num_sd,phase_mean,phase_sd,max_period,amplitude,sleep_percentage,continuous_day,age,sex,month_start,month_sent,urban_rural_1,urban_rural_11,urban_rural_12,urban_rural_13,urban_rural_16,urban_rural_17,urban_rural_2,urban_rural_3,urban_rural_5,urban_rural_6,urban_rural_7,urban_rural_8,urban_rural_9,urban_rural_null,group_eid_old_1,group_eid_old_2a,group_eid_old_2b,group_eid_old_3a,group_eid_old_3b,group_eid_old_4a,group_eid_old_4b,group_eid_old_5,group_eid_old_null,abnormal_group_eid_old_3b-1,abnormal_group_eid_old_3b-2,abnormal_group_eid_old_4b-1,abnormal_group_eid_old_4b-2,abnormal_group_eid_old_4b-3,abnormal_group_eid_old_4b-4,abnormal_group_eid_old_4b-5,abnormal_group_eid_old_4b-6,abnormal_group_eid_old_null,group_five_1,group_five_2,group_five_3,group_five_4,group_five_5,group_five_null
i64,date,i64,f64,f64,f64,f64,date,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
1000076,2014-03-17,0.0,20.9,1.8,9.8,24.2,2014-02-20,408.08334,83.0845,53.916668,22.49892,34.666668,55.541775,2.8333333,4.624812,462.0,93.65184,1.0,0.0,56.25,49.8203,0.6666667,0.745356,960.75,35.79659,1440.0,0.6415839,0.307755,6.0,69.0,0.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1000091,2015-06-27,0.0,21.1,0.8,9.9,25.4,2015-06-18,215.33333,98.566246,23.0,14.309088,73.333336,83.993385,22.916666,27.380676,286.0,29.759031,0.8333333,0.372678,36.09375,38.210037,2.6666667,1.6996732,960.4167,58.078552,1422.0,0.477603,0.20735,6.0,67.0,1.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
1000162,2015-01-28,0.0,19.9,2.1,9.4,23.3,2015-01-08,512.375,45.022736,110.625,56.505947,39.125,38.66422,18.75,21.185196,623.0,99.46042,1.0,0.0,46.3,35.205967,1.25,1.0897248,926.75,6.675515,1440.0,0.7407248,0.392535,4.0,51.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1000171,2014-09-26,0.0,20.2,1.1,9.7,22.7,2014-09-12,461.3,56.675037,64.6,33.898968,4.0,8.0,0.8,1.6,525.9,49.819073,1.0,0.0,24.0,0.0,0.2,0.4,895.3,10.42401,1440.0,0.670662,0.325625,5.0,59.0,0.0,9.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1000196,2015-10-22,1.0,21.1,1.3,7.3,23.4,2015-10-08,425.75,47.03257,27.916666,7.683406,24.5,32.601124,10.666667,12.171232,453.66666,46.807823,1.0,0.0,52.75,43.499283,0.6666667,0.471405,992.75,42.702213,1440.0,0.6553255,0.3181713,6.0,54.0,0.0,10.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1000209,2014-11-15,0.0,20.4,1.6,9.9,23.4,2014-11-07,412.1,99.94719,43.4,6.9526973,16.5,27.828043,14.0,26.284977,455.5,102.47585,1.0,0.0,76.25,61.75,0.4,0.489898,899.1,15.618579,1440.0,0.619383,0.29875,5.0,53.0,0.0,11.0,11.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1000235,2014-12-20,0.0,20.8,1.8,5.9,24.2,2014-12-05,449.25,14.082347,50.75,22.07138,7.5,9.34523,7.1666665,5.763872,500.0,26.821943,1.0,0.0,22.0,11.07926,0.6666667,0.471405,866.6667,43.436417,1440.0,0.641835,0.32037,6.0,53.0,0.0,12.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1000254,2013-06-10,0.0,20.4,1.6,7.9,25.5,2013-05-10,239.91667,125.897255,68.5,41.24116,80.5,97.910675,21.833334,19.766695,370.1,87.373566,0.8333333,0.372678,76.75,77.558044,1.3333334,0.745356,962.25,13.091824,1440.0,0.49903,0.2240162,6.0,58.0,1.0,6.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
100,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [9]:
df_onehot.columns

['eid',
 'date_start',
 'daylight_savings_crossover',
 'temperature_mean',
 'temperature_SD',
 'temperature_min',
 'temperature_max',
 'date_sent',
 'ST_long_mean',
 'ST_long_sd',
 'WT_long_mean',
 'WT_long_sd',
 'ST_short_mean',
 'ST_short_sd',
 'WT_short_mean',
 'WT_short_sd',
 'long_window_len_mean',
 'long_window_len_sd',
 'long_window_num_mean',
 'long_window_num_sd',
 'short_window_len_mean',
 'short_window_len_sd',
 'short_window_num_mean',
 'short_window_num_sd',
 'phase_mean',
 'phase_sd',
 'max_period',
 'amplitude',
 'sleep_percentage',
 'continuous_day',
 'age',
 'sex',
 'month_start',
 'month_sent',
 'urban_rural_1',
 'urban_rural_11',
 'urban_rural_12',
 'urban_rural_13',
 'urban_rural_16',
 'urban_rural_17',
 'urban_rural_2',
 'urban_rural_3',
 'urban_rural_5',
 'urban_rural_6',
 'urban_rural_7',
 'urban_rural_8',
 'urban_rural_9',
 'urban_rural_null',
 'group_eid_old_1',
 'group_eid_old_2a',
 'group_eid_old_2b',
 'group_eid_old_3a',
 'group_eid_old_3b',
 'group_eid_old_

In [20]:
def func_linreg_wrap(df_onehot, 
                     pheno_list_confounding, 
                     pheno_list_explanatory,
                     pheno_list_objective,
                     dir_out, filename_suffix):
    print(datetime.now(), 
          "Start processing", len(pheno_list_objective), "objective phenotypes")

    for i, pheno_objective in enumerate(pheno_list_objective, 1):
        print(datetime.now(), "Processing objective phenotype", i, ":", pheno_objective)
        result_list_single = func_linreg(df_onehot, pheno_objective, 
                                  pheno_list_confounding, pheno_list_explanatory)
        if i == 1:
            result_list_all = result_list_single
        else:
            result_list_all.extend(result_list_single[1:])
    # Convert to Polars DataFrame
    data_result_linreg = func_list_to_polars(result_list_all)

    columns_float64 = ['params_intercept',
        'params_X',
        'pval_intercept',
        'pval_X',
        'r^2',
        'Condition_number']

    # Convert to numbers where applicable
    for col_name in columns_float64:
        data_result_linreg = data_result_linreg.with_columns(
            pl.when(pl.col(col_name) == "NA")
            .then(pl.lit(None))
            .otherwise(pl.col(col_name))
            .cast(pl.Float64)
            .alias(col_name)
        )
    #print(data_result_linreg.head)
    
    print("Add P-bonf, Q-values, and extract statistically significant variables")
    n_bonferroni = data_result_linreg.shape[0]
    print(n_bonferroni, "tests are found")

    data_result_linreg = (data_result_linreg
          .with_columns(pl.col("pval_X").alias("pval_X_Bonf"))
          .with_columns(pl.col("pval_X").alias("pval_X_log10"))
          .with_columns(pl.col("pval_X").alias("pval_X_qval"))
          .with_columns(np.log10(pl.col("pval_X_log10")))
          .sort(by='pval_X_qval'))

    data_result_linreg = (data_result_linreg
          # Calculate Bonferroni-corrected P values
          .with_columns(pl.col('pval_X_Bonf') * n_bonferroni)
           # create a new column which is true if "P_Bonf" is under threshold
          .with_columns((pl.col("pval_X_Bonf") < 0.05).alias("pval_X_Bonf_positive"))
          # Calculate Q values
          # Multiply Q column by number of non-null values
          .with_columns(pl.col('pval_X_qval') * n_bonferroni)
          # # Divide the Q columns by the rank of individual value among all the non-null values
          .with_columns(pl.col('pval_X_qval') / data_result_linreg['pval_X_qval'].rank())
          # create a new column which is true if "q" is under threshold
          .with_columns((pl.col("pval_X_qval") < 0.05).alias("pval_X_qval_positive"))
          # Add log10 values
          .with_columns(pl.col("pval_X_Bonf").alias("pval_X_Bonf_log10"))
          .with_columns(np.log10(pl.col("pval_X_Bonf_log10")))
          .with_columns(pl.col("pval_X_qval").alias("pval_X_qval_log10"))
          .with_columns(np.log10(pl.col("pval_X_qval_log10")))
          # Sort
          .sort(by='pval_X_Bonf_positive', descending=True)
         )

    print(data_result_linreg.head)
    file_out = os.path.join(dir_out, filename_suffix)
    data_result_linreg.write_csv(file_out)

## Season-related

In [22]:
pheno_list_confounding = ['age', 'sex',]

pheno_list_explanatory = [
    'daylight_savings_crossover',
    'temperature_mean',
    'temperature_SD',
    'temperature_min',
    'temperature_max',
    'continuous_day',
    'month_start',
    'month_sent',]

pheno_list_objective = [
    'ST_long_mean',
    'ST_long_sd',
    'WT_long_mean',
    'WT_long_sd',
    'ST_short_mean',
    'ST_short_sd',
    'WT_short_mean',
    'WT_short_sd',
    'long_window_len_mean',
    'long_window_len_sd',
    'long_window_num_mean',
    'long_window_num_sd',
    'short_window_len_mean',
    'short_window_len_sd',
    'short_window_num_mean',
    'short_window_num_sd',
    'phase_mean',
    'phase_sd',
    'max_period',
    'amplitude',
    'sleep_percentage',
    'group_eid_old_1',
    'group_eid_old_2a',
    'group_eid_old_2b',
    'group_eid_old_3a',
    'group_eid_old_3b',
    'group_eid_old_4a',
    'group_eid_old_4b',
    'group_eid_old_5',
    'group_eid_old_null',
    'abnormal_group_eid_old_3b-1',
    'abnormal_group_eid_old_3b-2',
    'abnormal_group_eid_old_4b-1',
    'abnormal_group_eid_old_4b-2',
    'abnormal_group_eid_old_4b-3',
    'abnormal_group_eid_old_4b-4',
    'abnormal_group_eid_old_4b-5',
    'abnormal_group_eid_old_4b-6',
    'abnormal_group_eid_old_null',
    'group_five_1',
    'group_five_2',
    'group_five_3',
    'group_five_4',
    'group_five_5',
    'group_five_null'
]

func_linreg_wrap(df_onehot, pheno_list_confounding, pheno_list_explanatory, pheno_list_objective,
                dir_out, "result_linreg_season.csv")

2023-07-20 18:12:46.138189 Start processing 45 objective phenotypes
2023-07-20 18:12:46.138189 Processing objective phenotype 1 : ST_long_mean
2023-07-20 18:12:46.491219 Processing objective phenotype 2 : ST_long_sd
2023-07-20 18:12:46.848301 Processing objective phenotype 3 : WT_long_mean
2023-07-20 18:12:47.164195 Processing objective phenotype 4 : WT_long_sd
2023-07-20 18:12:47.506139 Processing objective phenotype 5 : ST_short_mean
2023-07-20 18:12:47.855719 Processing objective phenotype 6 : ST_short_sd
2023-07-20 18:12:48.185259 Processing objective phenotype 7 : WT_short_mean
2023-07-20 18:12:48.520322 Processing objective phenotype 8 : WT_short_sd
2023-07-20 18:12:48.846244 Processing objective phenotype 9 : long_window_len_mean
2023-07-20 18:12:49.205806 Processing objective phenotype 10 : long_window_len_sd
2023-07-20 18:12:49.668316 Processing objective phenotype 11 : long_window_num_mean
2023-07-20 18:12:50.011853 Processing objective phenotype 12 : long_window_num_sd
2023-

In [24]:
pheno_list_confounding = ['age', 'sex',]

pheno_list_explanatory = [
    'temperature_mean',
    'temperature_min',
    'temperature_max',
    'month_start',]

pheno_list_objective = [
    'ST_long_mean',
    'ST_long_sd',
    'WT_long_mean',
    'WT_long_sd',
    'ST_short_mean',
    'ST_short_sd',
    'WT_short_mean',
    'WT_short_sd',
    'long_window_len_mean',
    'long_window_len_sd',
    'long_window_num_mean',
    'long_window_num_sd',
    'short_window_len_mean',
    'short_window_len_sd',
    'short_window_num_mean',
    'short_window_num_sd',
    'phase_mean',
    'phase_sd',
    'max_period',
    'amplitude',
    'sleep_percentage',
    'group_eid_old_1',
    'group_eid_old_2a',
    'group_eid_old_2b',
    'group_eid_old_3a',
    'group_eid_old_3b',
    'group_eid_old_4a',
    'group_eid_old_4b',
    'group_eid_old_5',
    'abnormal_group_eid_old_3b-1',
    'abnormal_group_eid_old_3b-2',
    'abnormal_group_eid_old_4b-1',
    'abnormal_group_eid_old_4b-2',
    'abnormal_group_eid_old_4b-3',
    'abnormal_group_eid_old_4b-4',
    'abnormal_group_eid_old_4b-5',
    'abnormal_group_eid_old_4b-6',
    'group_five_1',
    'group_five_2',
    'group_five_3',
    'group_five_4',
    'group_five_5',
]

func_linreg_wrap(df_onehot, pheno_list_confounding, pheno_list_explanatory, pheno_list_objective,
                dir_out, "result_linreg_season_part.csv")

2023-07-20 18:14:54.056237 Start processing 42 objective phenotypes
2023-07-20 18:14:54.056237 Processing objective phenotype 1 : ST_long_mean
2023-07-20 18:14:54.236773 Processing objective phenotype 2 : ST_long_sd
2023-07-20 18:14:54.430285 Processing objective phenotype 3 : WT_long_mean
2023-07-20 18:14:54.602683 Processing objective phenotype 4 : WT_long_sd
2023-07-20 18:14:54.768159 Processing objective phenotype 5 : ST_short_mean
2023-07-20 18:14:54.917056 Processing objective phenotype 6 : ST_short_sd
2023-07-20 18:14:55.063951 Processing objective phenotype 7 : WT_short_mean
2023-07-20 18:14:55.212355 Processing objective phenotype 8 : WT_short_sd
2023-07-20 18:14:55.373778 Processing objective phenotype 9 : long_window_len_mean
2023-07-20 18:14:55.535205 Processing objective phenotype 10 : long_window_len_sd
2023-07-20 18:14:55.746506 Processing objective phenotype 11 : long_window_num_mean
2023-07-20 18:14:56.042427 Processing objective phenotype 12 : long_window_num_sd
2023-

## Urban vs. rural

In [23]:
pheno_list_confounding = ['age', 'sex',]

pheno_list_explanatory = [
    'urban_rural_1',
    'urban_rural_11',
    'urban_rural_12',
    'urban_rural_13',
    'urban_rural_16',
    'urban_rural_17',
    'urban_rural_2',
    'urban_rural_3',
    'urban_rural_5',
    'urban_rural_6',
    'urban_rural_7',
    'urban_rural_8',
    'urban_rural_9',
    'urban_rural_null',
]

pheno_list_objective = [
    'ST_long_mean',
    'ST_long_sd',
    'WT_long_mean',
    'WT_long_sd',
    'ST_short_mean',
    'ST_short_sd',
    'WT_short_mean',
    'WT_short_sd',
    'long_window_len_mean',
    'long_window_len_sd',
    'long_window_num_mean',
    'long_window_num_sd',
    'short_window_len_mean',
    'short_window_len_sd',
    'short_window_num_mean',
    'short_window_num_sd',
    'phase_mean',
    'phase_sd',
    'max_period',
    'amplitude',
    'sleep_percentage',
    'group_eid_old_1',
    'group_eid_old_2a',
    'group_eid_old_2b',
    'group_eid_old_3a',
    'group_eid_old_3b',
    'group_eid_old_4a',
    'group_eid_old_4b',
    'group_eid_old_5',
    'group_eid_old_null',
    'abnormal_group_eid_old_3b-1',
    'abnormal_group_eid_old_3b-2',
    'abnormal_group_eid_old_4b-1',
    'abnormal_group_eid_old_4b-2',
    'abnormal_group_eid_old_4b-3',
    'abnormal_group_eid_old_4b-4',
    'abnormal_group_eid_old_4b-5',
    'abnormal_group_eid_old_4b-6',
    'abnormal_group_eid_old_null',
    'group_five_1',
    'group_five_2',
    'group_five_3',
    'group_five_4',
    'group_five_5',
    'group_five_null'
]

func_linreg_wrap(df_onehot, pheno_list_confounding, pheno_list_explanatory, pheno_list_objective,
                dir_out, "result_linreg_urbanrural.csv")

2023-07-20 18:13:22.148805 Start processing 45 objective phenotypes
2023-07-20 18:13:22.148805 Processing objective phenotype 1 : ST_long_mean
2023-07-20 18:13:22.679937 Processing objective phenotype 2 : ST_long_sd
2023-07-20 18:13:23.226178 Processing objective phenotype 3 : WT_long_mean
2023-07-20 18:13:23.805430 Processing objective phenotype 4 : WT_long_sd
2023-07-20 18:13:24.476613 Processing objective phenotype 5 : ST_short_mean
2023-07-20 18:13:25.074640 Processing objective phenotype 6 : ST_short_sd
2023-07-20 18:13:25.657583 Processing objective phenotype 7 : WT_short_mean
2023-07-20 18:13:26.242612 Processing objective phenotype 8 : WT_short_sd
2023-07-20 18:13:26.826882 Processing objective phenotype 9 : long_window_len_mean
2023-07-20 18:13:27.380508 Processing objective phenotype 10 : long_window_len_sd
2023-07-20 18:13:27.959726 Processing objective phenotype 11 : long_window_num_mean
2023-07-20 18:13:28.526443 Processing objective phenotype 12 : long_window_num_sd
2023-