In [1]:
# https://towardsdatascience.com/quickly-test-multiple-models-a98477476f0

In [2]:
from sklearn.linear_model import ElasticNet, SGDRegressor, BayesianRidge, LinearRegression
from sklearn.svm import SVR
from catboost import CatBoostRegressor
from sklearn.kernel_ridge import KernelRidge
from xgboost import XGBRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import make_scorer
from sklearn.tree import DecisionTreeRegressor
import re
from sklearn.ensemble import RandomForestRegressor


In [3]:
threshold = 1
random_state = 42

In [4]:
df = pd.read_csv("test_data/Grounded CPW Leaky Wave antenna/S11 Data.csv")

df = df.drop(df[df['dB(S(1,1)) []'] > 0].index)

df = df.rename(columns={'dB(S(1,1)) []': 's11'})
regex = re.compile(r"\[|\]|<", re.IGNORECASE)
df.columns = [regex.sub("_", col) if any(x in str(col) for x in set(('[', ']', '<'))) else col for col in df.columns.values]



input_x = df.drop(columns=['s11'], axis=1)
input_y = df[['s11']]

X_train, X_test, y_train, y_test = train_test_split(input_x, input_y, random_state=random_state)

In [5]:
def generate_pipeline(scaler, model):
    return (str(model), Pipeline(steps=[('normalize', scaler), ('model', model)]))

In [6]:
scaler = MinMaxScaler(feature_range=(0,1))
models = [generate_pipeline(scaler, model) for model in [ElasticNet(), SGDRegressor(), BayesianRidge(), LinearRegression(), CatBoostRegressor(), KernelRidge(), XGBRegressor(), DecisionTreeRegressor(), SVR(), RandomForestRegressor(n_estimators=10, max_features=2, max_leaf_nodes=5,random_state=42)]]

In [7]:
# Check if predicted value is threshold amount above or below actual value
def is_in_threshold(actual, pred):
    return pred <= actual + threshold and pred >= actual - threshold

def create_tf_column(results):
    return results.apply(lambda x: is_in_threshold(x['y_test'], x['predictions']), axis=1)

def get_score(y_test, y_pred):
    dataframe = pd.DataFrame(y_test.values, columns=['y_test'])
    dataframe['predictions'] = y_pred
    return create_tf_column(dataframe).value_counts().get(True) / dataframe.shape[0]

In [8]:
test = models[-1][1].fit(X_train, y_train)
y_pred = pd.DataFrame(test.predict(X_test), columns=y_test.columns)


  return fit_method(estimator, *args, **kwargs)


In [9]:
get_score(y_test, y_pred)

0.17126546146527116

In [10]:
dfs = []
results = []
names = []
scorer = {'custom': make_scorer(get_score, greater_is_better=True)}

for name, model in models:
  kfold = model_selection.KFold(n_splits=5, shuffle=True, random_state=90210)
  cv_results = model_selection.cross_validate(model, X_train, y_train, cv=kfold, scoring=scorer)
  clf = model.fit(X_train, y_train)
  y_pred = clf.predict(X_test)
  print(name)

  # print(get_score(X_test, y_test, y_pred, model))
  results.append(cv_results)
  names.append(name)
  this_df = pd.DataFrame(cv_results)
  this_df['model'] = name
  dfs.append(this_df)
  final = pd.concat(dfs, ignore_index=True)
  print(final)

ElasticNet()
   fit_time  score_time  test_custom         model
0  0.035347    0.045019     0.069731  ElasticNet()
1  0.107336    0.039035     0.093502  ElasticNet()
2  0.029913    0.029031     0.082540  ElasticNet()
3  0.036668    0.020604     0.096825  ElasticNet()
4  0.034177    0.031679     0.082540  ElasticNet()


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


SGDRegressor()
   fit_time  score_time  test_custom           model
0  0.035347    0.045019     0.069731    ElasticNet()
1  0.107336    0.039035     0.093502    ElasticNet()
2  0.029913    0.029031     0.082540    ElasticNet()
3  0.036668    0.020604     0.096825    ElasticNet()
4  0.034177    0.031679     0.082540    ElasticNet()
5  0.033461    0.019635     0.201268  SGDRegressor()
6  0.029941    0.021406     0.201268  SGDRegressor()
7  0.016343    0.014398     0.219048  SGDRegressor()
8  0.014336    0.010020     0.180952  SGDRegressor()
9  0.012142    0.009892     0.187302  SGDRegressor()


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


BayesianRidge()
    fit_time  score_time  test_custom            model
0   0.035347    0.045019     0.069731     ElasticNet()
1   0.107336    0.039035     0.093502     ElasticNet()
2   0.029913    0.029031     0.082540     ElasticNet()
3   0.036668    0.020604     0.096825     ElasticNet()
4   0.034177    0.031679     0.082540     ElasticNet()
5   0.033461    0.019635     0.201268   SGDRegressor()
6   0.029941    0.021406     0.201268   SGDRegressor()
7   0.016343    0.014398     0.219048   SGDRegressor()
8   0.014336    0.010020     0.180952   SGDRegressor()
9   0.012142    0.009892     0.187302   SGDRegressor()
10  0.073129    0.013489     0.204437  BayesianRidge()
11  0.047216    0.015262     0.198098  BayesianRidge()
12  0.027666    0.012791     0.231746  BayesianRidge()
13  0.023751    0.019456     0.177778  BayesianRidge()
14  0.028364    0.017676     0.179365  BayesianRidge()
LinearRegression()
    fit_time  score_time  test_custom               model
0   0.035347    0.045019   

  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or 

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=None, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=None, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=None, n_jobs=None,
             num_parallel_tree=None, random_state=None, ...)
    fit_time  score_time  test_custom  \
0   0.035347    0.045019     0.069731   
1   0.107336    0.039035     0.093502   
2   0.029913    0.029031     0.082540   
3   0.036668    0.020604     0.096825   
4   0.034177    0.031679     0.082540   
5   0.0

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


SVR()
    fit_time  score_time  test_custom  \
0   0.035347    0.045019     0.069731   
1   0.107336    0.039035     0.093502   
2   0.029913    0.029031     0.082540   
3   0.036668    0.020604     0.096825   
4   0.034177    0.031679     0.082540   
5   0.033461    0.019635     0.201268   
6   0.029941    0.021406     0.201268   
7   0.016343    0.014398     0.219048   
8   0.014336    0.010020     0.180952   
9   0.012142    0.009892     0.187302   
10  0.073129    0.013489     0.204437   
11  0.047216    0.015262     0.198098   
12  0.027666    0.012791     0.231746   
13  0.023751    0.019456     0.177778   
14  0.028364    0.017676     0.179365   
15  0.014123    0.022479     0.204437   
16  0.019264    0.028448     0.201268   
17  0.010653    0.011387     0.231746   
18  0.007657    0.010607     0.176190   
19  0.008749    0.010657     0.179365   
20  1.869462    0.009498     0.445325   
21  2.292510    0.012990     0.459588   
22  2.593685    0.010941     0.447619   
23  1.9369

  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


In [11]:
final.sort_values('test_custom')

Unnamed: 0,fit_time,score_time,test_custom,model
0,0.035347,0.045019,0.069731,ElasticNet()
2,0.029913,0.029031,0.08254,ElasticNet()
4,0.034177,0.031679,0.08254,ElasticNet()
1,0.107336,0.039035,0.093502,ElasticNet()
3,0.036668,0.020604,0.096825,ElasticNet()
45,0.020283,0.007528,0.123613,"RandomForestRegressor(max_features=2, max_leaf..."
47,0.025503,0.00735,0.125397,"RandomForestRegressor(max_features=2, max_leaf..."
48,0.026104,0.007302,0.150794,"RandomForestRegressor(max_features=2, max_leaf..."
18,0.007657,0.010607,0.17619,LinearRegression()
13,0.023751,0.019456,0.177778,BayesianRidge()
