In [53]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import joblib
from sklearn.metrics import mean_squared_error, r2_score

In [54]:
random_state = 20
threshold = 1 # Accuracy scoring threshold

In [55]:
# Import data and preprocess
df = pd.read_csv("../test_data/patch_antenna/Patch Antenna S11 Data.csv")
df = df.drop(df[df['dB(S(1,1)) []'] > 0].index) # Remove all rows with positive s11

In [56]:
df

Unnamed: 0,inset_dist [mm],L [mm],sub_thick [mm],W [mm],W0 [mm],y0 [mm],Freq [GHz],"dB(S(1,1)) []"
0,0.6,11.5,2,14.0,2.5,3.0,4.00,-0.187827
1,0.6,11.5,2,14.0,2.5,3.0,4.08,-0.195727
2,0.6,11.5,2,14.0,2.5,3.0,4.16,-0.204364
3,0.6,11.5,2,14.0,2.5,3.0,4.24,-0.213811
4,0.6,11.5,2,14.0,2.5,3.0,4.32,-0.224152
...,...,...,...,...,...,...,...,...
40900,1.4,12.5,2,15.6,3.5,5.0,11.68,-17.165484
40901,1.4,12.5,2,15.6,3.5,5.0,11.76,-20.608874
40902,1.4,12.5,2,15.6,3.5,5.0,11.84,-25.518707
40903,1.4,12.5,2,15.6,3.5,5.0,11.92,-29.187829


In [57]:
# Split into x and y
input_x = df.drop(columns=['dB(S(1,1)) []'], axis=1)
input_y = df[['dB(S(1,1)) []']]

In [58]:
# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(input_x, input_y, random_state=random_state)

In [59]:
model = RandomForestRegressor(max_depth=110, min_samples_leaf=1, min_samples_split=2, n_estimators=1000, random_state=random_state)
scaler = MinMaxScaler(feature_range=(0,1)) # Initialize scaler
pipeline = Pipeline(steps=[('normalize', scaler), ('model', model)]) # Create pipeline with scaler and model

In [60]:
%%time
# Train and predict the pipeline
pipeline_fit = pipeline.fit(X_train, y_train)

  return fit_method(estimator, *args, **kwargs)


CPU times: user 31.9 s, sys: 636 ms, total: 32.5 s
Wall time: 32.7 s


In [61]:
%%time
predictions = pipeline_fit.predict(X_test)

CPU times: user 2.17 s, sys: 0 ns, total: 2.17 s
Wall time: 2.17 s


In [62]:
# Check if predicted value is threshold amount above or below actual value
def is_in_threshold(actual, pred):
    return pred <= actual + threshold and pred >= actual - threshold

In [63]:
# Create new boolean column that shows if the test and prediction values are the same
results = X_test.copy()
def create_tf_column(results):
    return results.apply(lambda x: is_in_threshold(x['y_test'], x['predictions']), axis=1)

In [64]:
# Calculate accuracy of model by number of predictions that are within threshold value above or below the test value for each row
def get_score(X_test, y_test, predictions, clf_dt):
    predictions = clf_dt.predict(X_test)
    dataframe = pd.DataFrame(X_test.copy(), columns=input_x.columns)
    dataframe['y_test'] = y_test.values
    dataframe['predictions'] = predictions
    return create_tf_column(dataframe).value_counts().get(True) / dataframe.shape[0]

In [65]:
X_test

Unnamed: 0,inset_dist [mm],L [mm],sub_thick [mm],W [mm],W0 [mm],y0 [mm],Freq [GHz]
19860,1.4,12.0,2,14.8,3.0,3.5,9.12
29319,0.6,11.5,2,15.6,3.5,4.0,6.32
502,1.0,11.5,2,14.0,3.0,3.0,11.84
4555,0.6,12.0,2,14.0,2.5,3.0,4.80
6113,1.4,12.0,2,14.0,2.5,3.5,8.24
...,...,...,...,...,...,...,...
23493,1.4,12.5,2,14.8,3.0,3.0,8.88
22327,1.0,12.0,2,14.8,3.5,5.0,4.48
3126,1.0,11.5,2,14.0,2.5,4.5,11.68
15226,1.4,11.5,2,14.8,2.5,3.5,10.08


In [66]:
print(f"Score within +-{threshold}: {get_score(X_test, y_test, predictions, pipeline)}")
print("RMSE:", mean_squared_error(y_test, predictions, squared=False))
print("R^2:", r2_score(y_test, predictions))

Score within +-1: 0.9292070010755842
RMSE: 0.8277450381247843
R^2: 0.9590679549349775


In [67]:
joblib.dump(pipeline, "antenna_model.pkl")

['antenna_model.pkl']