In [None]:
import sys
import os

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC


sys.path.append(os.path.dirname(os.getcwd()))
from utils import calculate_score, precision_calculate_score
from IterativeSelector import IterativeSelector

import warnings
warnings.filterwarnings("ignore")

In [None]:
x_train = pd.read_csv("../../../data/x_train.txt", header=None, sep=" ")
y_train = pd.read_csv("../../../data/y_train.txt", header=None, sep=" ")

df_train = pd.concat([x_train, y_train], axis=1)
df_train.columns = ["x" + str(i) for i in range(1, df_train.shape[1])] + ["y"]

X_train, X_test, y_train, y_test = train_test_split(
    df_train.drop(columns="y"), df_train["y"], test_size=0.2, stratify=df_train["y"]
)

In [None]:
initial_features = [
    "x1",
    "x10",
    "x101",
    "x102",
    "x103",
    "x104",
    "x105",
    "x106",
    "x132",
    "x140",
    "x149",
    "x153",
    "x156",
    "x176",
    "x191",
    "x2",
    "x22",
    "x221",
    "x229",
    "x253",
    "x286",
    "x3",
    "x304",
    "x322",
    "x323",
    "x324",
    "x329",
    "x336",
    "x35",
    "x352",
    "x36",
    "x4",
    "x40",
    "x404",
    "x413",
    "x423",
    "x459",
    "x463",
    "x499",
    "x5",
    "x58",
    "x6",
    "x65",
    "x7",
    "x74",
    "x8",
    "x81",
    "x9",
    "x99",
]

models = [
    RandomForestClassifier(n_estimators=100),
    SVC(kernel="rbf", C=0.8),
]
backward_selector = IterativeSelector(
    models=models,
    scoring_functions=[
        precision_calculate_score,
        calculate_score,
        precision_score,
        accuracy_score,
    ],
    direction="backward",
)
backward_selector.run(
    X_train,
    y_train,
    X_test,
    y_test,
    steps_tolerance=20,
    initial_features=initial_features,
)

In [None]:
backward_selector.scores.to_csv("results/iterative.csv", index=False)