In [6]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix, plot_confusion_matrix

from sklearn.naive_bayes import MultinomialNB, GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

import utilities.graph as graph

pd.options.display.max_colwidth = 200


In [2]:
final_df = pd.read_csv("./data/final_processed.csv")

In [4]:
X = final_df.drop(columns=["||__target__||"])
y = final_df["||__target__||"]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [8]:
svc_pipe = Pipeline([
    ("svc_ss", StandardScaler()),
    ("svc", SVC()),
])

vote = VotingClassifier([
    ("rtree", RandomForestClassifier()),
    ("etree", ExtraTreesClassifier()),
    ("ada", AdaBoostClassifier(base_estimator=DecisionTreeClassifier())),
    ("gradbc", GradientBoostingClassifier()),
    ("svm", svc_pipe)
])

params = {
    "weights": [
        [1,0,0,0,0],
        [0,1,0,0,0],
        [0,0,1,0,0],
        [0,0,0,1,0],
        [0,0,0,0,1],
    ],
    "rtree__max_depth": [1],
    "etree__max_depth": [1],
    "ada__base_estimator__max_depth": [1],
#    "svm"   
}

gs = GridSearchCV(vote, params)

In [None]:
gs.fit(X_train, y_train)