In [None]:
# Adapted from Hands-on Machine Learning https://github.com/ageron/handson-ml2/blob/master/04_training_linear_models.ipynb

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.discriminant_analysis import StandardScaler
from sklearn.pipeline import Pipeline
import pandas as pd
from tqdm import tqdm

def plot_learning_curves(model, X, y):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=10)
    train_errors, val_errors = [], []
    for m in tqdm(range(1, len(X_train) + 1, 100)):
        model.fit(X_train[:m], y_train[:m])
        y_train_predict = model.predict(X_train[:m])
        y_val_predict = model.predict(X_val)
        train_errors.append(mean_squared_error(y_train[:m], y_train_predict))
        val_errors.append(mean_squared_error(y_val, y_val_predict))
    
    plt.plot(np.sqrt(train_errors), "r-+", linewidth=2, label="train")
    plt.plot(np.sqrt(val_errors), "b-", linewidth=3, label="val")
    
    plt.legend(loc="upper right", fontsize=14)   
    plt.xlabel("Training set size", fontsize=14) 
    plt.ylabel("RMSE", fontsize=14) 

In [None]:
# model = XGBRegressor(n_estimators=1100, min_child_weight=2, max_depth=10, learning_rate=0.2)
model = RandomForestRegressor(max_depth=60, min_samples_leaf=1, min_samples_split=2, n_estimators=775)
scaler = StandardScaler() # Initialize scaler
pipeline = Pipeline(steps=[('normalize', scaler), ('model', model)]) # Create pipeline with scaler and model

In [None]:
# Import data and preprocess
df = pd.read_csv("../test_data/new leaky wave/S11_Data_combined.csv")
df = df.drop(df[df['dB(S(1,1)) []'] > 0].index) # Remove all rows with positive s11
# Split into x and y
input_x = df.drop(columns=['dB(S(1,1)) []'], axis=1)
input_y = df[['dB(S(1,1)) []']]

In [None]:
plot_learning_curves(pipeline, input_x, input_y)
# plt.axis([0, 300, 0, 1000])
plt.show()

In [20]:
plt.show()