In [220]:
# For Random Tree Regression Model
#! pip install scikit-learn

In [221]:
import pandas as pd
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report
import matplotlib as plt

def build_xgb_model(data, title):
    # Get the X and Y variables from the given dataset
    # X is the features used to predict Y
    X = data.drop(labels = ["DX_encoded"], axis = 1)
    Y = data["DX_encoded"]

    eval_metric = ["auc","error"]

    print("XGB Model for ", title)

    # Split the X and Y data into training and testing data 
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.15, random_state=42)
    print("Shape of training data is: ", X_train.shape)
    print("Shape of testing data is: ", X_test.shape)

    # Create the XGBoost Model for multi-class classification
    xgb_model = xgb.XGBClassifier(objective='multi:softmax', num_class=len(set(Y)), use_label_encoder=False, eval_metric='mlogloss')

    # Fit the model to the X and Y data
    xgb_model.fit(X_train, y_train, verbose = True)

    # Calculation predictions based on trained model
    predictions = xgb_model.predict(X_test)

    # Give an accuracy score 
    accuracy = accuracy_score(y_test, predictions)
    print(f"Accuracy: {accuracy:.2f}")
    print(classification_report(y_test, predictions))

In [222]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

def build_randTree_model(data, title):
    # Get the X and Y variables from the given dataset
    # X is the features used to predict Y
    X = data.drop(labels = ["DX_encoded"], axis = 1)
    Y = data["DX_encoded"]

    print("RandomTree Model for ", title)

    # Split the X and Y data into training and testing data 
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
    print("Shape of training data is: ", X_train.shape)
    print("Shape of testing data is: ", X_test.shape)

    num_cols = X_train._get_numeric_data().columns
    print("Number of numeric features:",num_cols.size)

    # Create the XGBoost Model for multi-class classification
    randTreemodel = RandomForestRegressor(n_estimators=100)

    # Fit the model to the X and Y data
    randTreemodel.fit(X_train, y_train)

    # Give an accuracy score 
    r2 = randTreemodel.score(X_test, y_test)
    print(r2)

In [223]:
data_AB_ALPS_demo = pd.read_csv("/Users/dasmaster/BIL Lab Projects/project1/AB/data_AB_ALPS_demo_preprocessed.csv")

build_xgb_model(data_AB_ALPS_demo, "AB ALPS + demo")

XGB Model for  AB ALPS + demo
Shape of training data is:  (235, 16)
Shape of testing data is:  (42, 16)


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Accuracy: 0.74
              precision    recall  f1-score   support

           0       0.82      0.86      0.84        21
           1       0.40      0.50      0.44         4
           2       0.73      0.65      0.69        17

    accuracy                           0.74        42
   macro avg       0.65      0.67      0.66        42
weighted avg       0.74      0.74      0.74        42



In [224]:
build_randTree_model(data_AB_ALPS_demo, "AB ALPS + demo")

RandomTree Model for  AB ALPS + demo
Shape of training data is:  (221, 16)
Shape of testing data is:  (56, 16)
Number of numeric features: 16
0.3435582260371959


In [225]:
data_AB_EPVS_ALPS_demo = pd.read_csv("/Users/dasmaster/BIL Lab Projects/project1/AB/data_AB_EPVS_ALPS_demo_preprocessed.csv")

build_xgb_model(data_AB_EPVS_ALPS_demo, "AB EPVS + ALPS + demo")

XGB Model for  AB EPVS + ALPS + demo
Shape of training data is:  (235, 51)
Shape of testing data is:  (42, 51)


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Accuracy: 0.79
              precision    recall  f1-score   support

           0       0.81      1.00      0.89        21
           1       0.50      0.50      0.50         4
           2       0.83      0.59      0.69        17

    accuracy                           0.79        42
   macro avg       0.71      0.70      0.69        42
weighted avg       0.79      0.79      0.77        42



In [226]:
build_randTree_model(data_AB_EPVS_ALPS_demo, "AB EPVS + ALPS + demo")

RandomTree Model for  AB EPVS + ALPS + demo
Shape of training data is:  (221, 51)
Shape of testing data is:  (56, 51)
Number of numeric features: 51
0.3045625178826896


In [227]:
data_AB_EPVS_demo = pd.read_csv("/Users/dasmaster/BIL Lab Projects/project1/AB/data_AB_EPVS_demo_preprocessed.csv")

build_xgb_model(data_AB_EPVS_demo, "AB EPVS + demo")

XGB Model for  AB EPVS + demo
Shape of training data is:  (634, 41)
Shape of testing data is:  (113, 41)


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Accuracy: 0.69
              precision    recall  f1-score   support

           0       0.68      0.62      0.65        37
           1       0.73      0.80      0.76        20
           2       0.68      0.70      0.69        56

    accuracy                           0.69       113
   macro avg       0.70      0.71      0.70       113
weighted avg       0.69      0.69      0.69       113



In [228]:
build_randTree_model(data_AB_EPVS_demo, "AB EPVS + ALPS + demo")

RandomTree Model for  AB EPVS + ALPS + demo
Shape of training data is:  (597, 41)
Shape of testing data is:  (150, 41)
Number of numeric features: 41
0.2633732236350037


## Tau Models

In [229]:
data_Tau_EPVS_demo = pd.read_csv("/Users/dasmaster/BIL Lab Projects/project1/Tau/data_Tau_EPVS_demo_preprocessed.csv")

build_xgb_model(data_Tau_EPVS_demo, "Tau EPVS + demo")

XGB Model for  Tau EPVS + demo
Shape of training data is:  (215, 39)
Shape of testing data is:  (39, 39)


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Accuracy: 0.69
              precision    recall  f1-score   support

           0       0.81      0.68      0.74        25
           2       0.56      0.71      0.62        14

    accuracy                           0.69        39
   macro avg       0.68      0.70      0.68        39
weighted avg       0.72      0.69      0.70        39



In [230]:
data_Tau_ALPS_demo = pd.read_csv("/Users/dasmaster/BIL Lab Projects/project1/Tau/data_Tau_ALPS_demo_preprocessed.csv")

build_xgb_model(data_Tau_ALPS_demo, "Tau ALPS + demo")

XGB Model for  Tau ALPS + demo
Shape of training data is:  (186, 9)
Shape of testing data is:  (33, 9)


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Accuracy: 0.55
              precision    recall  f1-score   support

           0       0.67      0.64      0.65        22
           1       0.00      0.00      0.00         2
           2       0.33      0.44      0.38         9

    accuracy                           0.55        33
   macro avg       0.33      0.36      0.34        33
weighted avg       0.54      0.55      0.54        33



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [231]:
data_Tau_EPVS_ALPS_demo = pd.read_csv("/Users/dasmaster/BIL Lab Projects/project1/Tau/data_Tau_EPVS_ALPS_demo_preprocessed.csv")

build_xgb_model(data_Tau_EPVS_ALPS_demo, "Tau EPVS + ALPS + demo")

XGB Model for  Tau EPVS + ALPS + demo
Shape of training data is:  (186, 44)
Shape of testing data is:  (33, 44)


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Accuracy: 0.67
              precision    recall  f1-score   support

           0       0.77      0.77      0.77        22
           1       0.67      1.00      0.80         2
           2       0.38      0.33      0.35         9

    accuracy                           0.67        33
   macro avg       0.60      0.70      0.64        33
weighted avg       0.66      0.67      0.66        33

