In [10]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.decomposition import PCA
from sklearn.feature_selection import VarianceThreshold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.pipeline import make_pipeline

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from collections import defaultdict
from tqdm.notebook import tqdm


data = pd.read_csv("data/TCGAdata.txt", sep=" ")
labels = pd.read_csv("data/TCGAlabels", sep=" ")

In [12]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

hyperparams = {
    'n_estimators': [10, 50, 100, 200, 1000],  
    'max_depth': [3, 5, 10, 15, 25],
    "class_weight": ["balanced", None]
    


}


for n_estimators in hyperparams['n_estimators']:
    for max_depth in hyperparams['max_depth']:
        for class_weight in hyperparams['class_weight']:
            model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, class_weight=class_weight)
            model.fit(X_train, y_train["x"].ravel())
            y_pred = model.predict(X_test)
            print(f"n_estimators: {n_estimators}, max_depth: {max_depth}, class_weight: {class_weight}")
            print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
            print(f"F1 score: {f1_score(y_test, y_pred, average=None)}")
            print()

n_estimators: 10, max_depth: 3, class_weight: balanced
Accuracy: 0.8685121107266436
F1 score: [0.87555556 0.98412698 0.95901639 0.78599222 0.91089109 0.48780488]

n_estimators: 10, max_depth: 3, class_weight: None
Accuracy: 0.9411764705882353
F1 score: [0.94296578 0.91803279 0.9626556  0.97247706 0.92       0.        ]

n_estimators: 10, max_depth: 5, class_weight: balanced
Accuracy: 0.967128027681661
F1 score: [0.97177419 0.98412698 0.97942387 0.95614035 0.97087379 0.7826087 ]

n_estimators: 10, max_depth: 5, class_weight: None
Accuracy: 0.9740484429065744
F1 score: [0.98418972 0.98412698 0.97942387 0.96428571 0.97142857 0.66666667]

n_estimators: 10, max_depth: 10, class_weight: balanced
Accuracy: 0.9809688581314879
F1 score: [0.98412698 0.98412698 0.99186992 0.96832579 1.         0.77777778]

n_estimators: 10, max_depth: 10, class_weight: None
Accuracy: 0.9757785467128027
F1 score: [0.9860835  0.98412698 0.98360656 0.96428571 0.96153846 0.77777778]

n_estimators: 10, max_depth: 15, 