# Frailty Risk Modeling using QLattice
### Author: Shehbeel Arif
### Purpose: To develop an ML or statistical model to determine a patient's frailty risk status based on the muscle gene expression data.

---

## Load libraries

In [1]:
# Library for data handling
import numpy as np
import pandas as pd

# Library for splitting data into Training and Testing datasets
from sklearn.model_selection import train_test_split

# QLattice library
import feyn

# Visualization library
import seaborn as sns

This version of feyn and the QLattice is available for academic, personal, and non-commercial use. By using the community version of this software you agree to the terms and conditions which can be found at `https://abzu.ai/eula`.


## Data Preprocessing

In [23]:
# Load data
data_dir = '/Users/shehbeel/Documents/frailty-clinical-model/data/'
data_dir2 = '/Users/shehbeel/Documents/frailty-clinical-model/analyses/01-convert-ensembl-ids/results/'
meta = pd.read_csv(data_dir + 'GSE144304_meta.txt', delimiter='\t')
counts = pd.read_csv(data_dir2 + 'GSE144304_raw_counts_with_genes.csv', index_col='gene_symbol')

# Transpose counts
counts = counts.T
counts = counts.reset_index()

# Merge counts and meta data
data = pd.merge(meta, counts, left_on='sample_name', right_on='index').drop(['index'], axis=1)
data = data.set_index('sample_name')

# Convert target variable to boolean
#data['treatment'] = data['treatment'].map({'frail': True, 'fit': False, 'young':False}) 
data["frailty"] = data["frailty"].replace({"1":True, "0":False}).astype(bool)

# Drop gender for now
data = data.drop([#'gender', 
                  'condition'], axis=1)

# Sanity check
data

Unnamed: 0_level_0,gender,frailty,TSPAN6,TNMD,DPM1,SCYL3,C1orf112,FGR,CFH,FUCA2,...,MIR6787,MIR4793,MIR935,MIR5006,MIR4722,MIR92B,MIR943,MIR7847,MIR6785,MIR4467
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
s301,male,True,99,20,284,101,19,15,252,120,...,0,0,0,0,0,0,0,0,0,0
s302,male,True,54,3,381,127,7,39,113,145,...,0,0,0,0,0,0,0,0,0,0
s303,male,True,44,2,303,123,9,16,147,82,...,0,0,0,0,0,0,0,0,0,0
s304,male,True,59,3,306,146,1,6,92,83,...,0,0,0,1,0,0,0,0,0,0
s305,male,True,52,0,402,103,10,31,115,137,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
s530,female,False,54,0,349,139,5,4,51,70,...,0,0,0,0,0,0,0,0,0,0
s531,male,False,73,0,395,115,16,13,103,69,...,0,0,0,0,0,0,0,0,0,0
s401,male,False,55,2,252,97,4,17,48,62,...,0,0,0,1,0,0,0,0,0,0
s404,male,False,72,4,204,99,21,31,196,115,...,0,0,0,0,0,0,0,0,0,0


In [12]:
# Split the data into training and testing set
seed = 42
train, test = train_test_split(data, test_size = 0.2, stratify=data["frailty"], random_state=seed)

---

## Perform QLattice

In [16]:
# Connect to QLattice
ql = feyn.QLattice()

In [18]:
ql.reset(random_seed=1)

Deprecation: The reset() function is deprecated. Instantiating a new feyn.QLattice() now achieves the same result.


In [19]:
# Set the variable to predict as output
models = ql.auto_run(train, 
                     output_name='frailty', 
                     kind='categorical', 
                     stypes=stypes,
                     criterion='bic', 
                     max_complexity=4
                     )


TypeError: stypes should be of type Optional[Dict[str, str]].

In [10]:
data.dtypes

frailty    int64
TSPAN6     int64
TNMD       int64
DPM1       int64
SCYL3      int64
           ...  
MIR92B     int64
MIR943     int64
MIR7847    int64
MIR6785    int64
MIR4467    int64
Length: 35836, dtype: object