**Test Environment Notebook**
------

In [None]:
### Tool imports
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### sklearn imports
from sklearn.cluster import KMeans, MiniBatchKMeans

### Local imports
import utils.data_handling_lib as dhl
import utils.graphics_lib as gl

from utils.data_handling_lib import RANDOM_STATE

Create raw test dataset

In [None]:
dataset_file_path = "datasets/winequality-combined.csv"
test_raw_data = dhl.load_data(dataset_file_path)

Reduced features test

In [None]:
test_reduced_raw_data = test_raw_data.copy()

test_reduced_raw_data.drop(["free sulfur dioxide", "total sulfur dioxide", "pH", "residual sugar", "sulphates"], axis=1)

Split dataset

In [None]:
train, train_labels, test, test_labels = dhl.strat_split_dataset(test_reduced_raw_data, "quality", 1)

In [None]:
print(test.head())

**Random Forest w/ KMeans Clustering Test**

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [None]:
import utils.clustering_tools_lib as ctl

In [None]:

def fit_test_pipeline_1(data, labels):
    rfkc_test_pipeline = Pipeline([
        ("kmeans", KMeans(n_clusters=5, random_state=RANDOM_STATE)),
        ("rf_reg", RandomForestRegressor(n_estimators=100, max_features=4))
    ])
    
    rfkc_test_pipeline.fit(data, labels)
    
    return rfkc_test_pipeline

In [None]:
param_grid = dict(kmeans__n_clusters=[10, 100, 1000])

In [None]:
test_1 = fit_test_pipeline_1(train, train_labels)

In [None]:
grid_search = GridSearchCV(test_1, param_grid=param_grid)

In [None]:
grid_search.fit(train, train_labels)

In [None]:
print(f"\n{grid_search.best_score_}\n{grid_search.best_params_}\n{grid_search.best_estimator_}")

In [None]:
grid_search.best_score_

In [None]:
dhl.save_model(grid_search,filename="GridSearchRFRegTest1")

In [None]:
kmeans = KMeans(n_clusters=100)

In [None]:
gl.plot_decision_boundaries(kmeans, train)