In [None]:
# https://towardsdatascience.com/quickly-test-multiple-models-a98477476f0

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import ElasticNet, SGDRegressor, BayesianRidge, LinearRegression
from sklearn.svm import SVR
from catboost import CatBoostRegressor
from sklearn.kernel_ridge import KernelRidge
from xgboost import XGBRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score
import plotly.express as px
from sklearn.model_selection import GridSearchCV
import numpy as np

In [None]:
threshold = 1
random_state = 42

In [None]:
# Import data and preprocess
df = pd.read_csv("../test_data/Grounded CPW Leaky Wave antenna/S11 Data.csv")
df = df.drop(df[df['dB(S(1,1)) []'] > 0].index) # Remove all rows with positive s11

# Split into x and y
input_x = df.drop(columns=['dB(S(1,1)) []'], axis=1)
input_y = df[['dB(S(1,1)) []']]

# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(input_x, input_y, random_state=random_state)

In [None]:
def generate_pipeline(scaler, model):
    return (model.__class__.__name__, Pipeline(steps=[('normalize', scaler), ('model', model)]))

In [None]:
# Check if predicted value is threshold amount above or below actual value
def is_in_threshold(actual, pred):
    return pred <= actual + threshold and pred >= actual - threshold

def create_tf_column(results):
    return results.apply(lambda x: is_in_threshold(x['y_test'], x['predictions']), axis=1)

def get_score(y_test, y_pred):
    dataframe = pd.DataFrame(y_test.values, columns=['y_test'])
    dataframe['predictions'] = y_pred
    return create_tf_column(dataframe).value_counts().get(True) / dataframe.shape[0]

def get_rsme(y_test, y_pred):
    return mean_squared_error(y_test, y_pred, squared=False)

def get_r_squared(y_test, y_pred):
    return r2_score(y_test, y_pred)

In [None]:
# Test random forest getting best paramters

random_forest_model = RandomForestRegressor(random_state=random_state)
random_grid = {'n_estimators': [int(x) for x in np.linspace(start=200, stop=1000, num=5)],
               'max_depth': [int(x) for x in np.linspace(10, 110, num=5)],
               'min_samples_split': [2, 5, 10],
               'min_samples_leaf': [1, 2, 4]}
random_forest_search = GridSearchCV(random_forest_model, random_grid, scoring='r2', cv=10, verbose=10)
random_forest_search.fit(input_x, input_y.values.ravel())
print(random_forest_search.best_params_)

In [None]:
# Doesnt make sense for LinearRegression
elastic_net_model = ElasticNet(random_state=random_state)
random_grid = {"max_iter": [1, 5, 10],
                "alpha": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                "l1_ratio": np.arange(0.0, 1.0, 0.1)}
elastic_net_search = GridSearchCV(elastic_net_model, random_grid, scoring='r2', cv=10, verbose=10)
elastic_net_search.fit(input_x, input_y.values.ravel())
print(elastic_net_search.best_params_)

In [None]:
sgd_regressor_model = SGDRegressor(random_state=random_state)
random_grid = {
        'max_iter':[100000, 1000000],
        'tol':[1e-10, 1e-3],
        'eta0':[0.001, 0.01],
        "alpha": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
    }
sgd_regressor_search = GridSearchCV(sgd_regressor_model, random_grid, scoring='r2', cv=10, verbose=10)
sgd_regressor_search.fit(input_x, input_y.values.ravel())
print(sgd_regressor_search.best_params_)

In [None]:
svr_model = SVR()
random_grid = {
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'C': [1,5,10],
    'degree': [3,8],
    'coef0': [0.01,10,0.5],
    'gamma': ['auto','scale']
}
svr_search = GridSearchCV(svr_model, random_grid, cv=10, verbose=10, scoring='r2')
svr_search.fit(input_x, input_y.values.ravel())
print(svr_search.best_params_)

In [None]:
scaler = MinMaxScaler(feature_range=(0,1))
models = [generate_pipeline(scaler, model) for model in [ElasticNet(alpha=0.1, l1_ratio=0.8, max_iter=1), sgd_regressor_search.best_estimator_, BayesianRidge(), LinearRegression(), CatBoostRegressor(), KernelRidge(), XGBRegressor(), DecisionTreeRegressor(), SVR(), RandomForestRegressor()]]

In [None]:
# Test the random forest model 
model = models[1]
clf = model[1].fit(X_train, y_train)
y_pred = clf.predict(X_test)
get_score(y_test, y_pred)

In [None]:
y_test.values.reshape(-1)

In [None]:
pd.DataFrame({'y_actual': y_test.values.reshape(-1), 'y_pred': y_pred})

In [None]:
results = {}

for name, model in models:
  clf = model.fit(X_train, y_train)
  y_pred = clf.predict(X_test)

  results[name] = {
    'score': get_score(y_test, y_pred),
    'rsme': get_rsme(y_test, y_pred),
    'r_squared': get_r_squared(y_test, y_pred),
  }

In [None]:
df = pd.DataFrame(results).T

In [None]:
px.bar(df, x=df.index, y='score')

In [None]:
px.bar(df, x=df.index, y='rsme')

In [None]:
px.bar(df, x=df.index, y='r_squared')