In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import joblib
from sklearn.metrics import mean_squared_error, r2_score

In [17]:
random_state = 20
threshold = 1 # Accuracy scoring threshold

In [18]:
# Import data and preprocess
df = pd.read_csv("../test_data/Grounded CPW Leaky Wave antenna/S11 Data.csv")
df = df.drop(df[df['dB(S(1,1)) []'] > 0].index) # Remove all rows with positive s11

In [19]:
df

Unnamed: 0,Feed_Gap [mm],Feed_Inset [cm],Feed_W [cm],Ground_gap [mm],Pad_L [cm],Pad_W [cm],Freq [GHz],"dB(S(1,1)) []"
0,0.5,0.2,1.2,3,0.70,0.9,2.00,-0.232632
1,0.5,0.2,1.2,3,0.70,0.9,2.09,-0.340011
2,0.5,0.2,1.2,3,0.70,0.9,2.18,-1.501822
3,0.5,0.2,1.2,3,0.70,0.9,2.27,-1.827965
4,0.5,0.2,1.2,3,0.70,0.9,2.36,-0.448171
...,...,...,...,...,...,...,...,...
4216,0.5,0.2,1.2,4,0.75,1.2,19.64,-15.530748
4217,0.5,0.2,1.2,4,0.75,1.2,19.73,-15.681817
4218,0.5,0.2,1.2,4,0.75,1.2,19.82,-15.837634
4219,0.5,0.2,1.2,4,0.75,1.2,19.91,-16.001216


In [20]:
# Split into x and y
input_x = df.drop(columns=['dB(S(1,1)) []'], axis=1)
input_y = df[['dB(S(1,1)) []']]

In [21]:
# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(input_x, input_y, random_state=random_state)

In [22]:
model = DecisionTreeRegressor(random_state=random_state) # Initialize model
scaler = MinMaxScaler(feature_range=(0,1)) # Initialize scaler
pipeline = Pipeline(steps=[('normalize', scaler), ('model', model)]) # Create pipeline with scaler and model

In [23]:
%%time
# Train and predict the pipeline
pipeline_fit = pipeline.fit(X_train, y_train)

CPU times: user 8.19 ms, sys: 187 µs, total: 8.38 ms
Wall time: 8.07 ms


In [24]:
%%time
predictions = pipeline_fit.predict(X_test)

CPU times: user 690 µs, sys: 987 µs, total: 1.68 ms
Wall time: 1.46 ms


In [25]:
# Check if predicted value is threshold amount above or below actual value
def is_in_threshold(actual, pred):
    return pred <= actual + threshold and pred >= actual - threshold

In [26]:
# Create new boolean column that shows if the test and prediction values are the same
results = X_test.copy()
def create_tf_column(results):
    return results.apply(lambda x: is_in_threshold(x['y_test'], x['predictions']), axis=1)

In [27]:
# Calculate accuracy of model by number of predictions that are within threshold value above or below the test value for each row
def get_score(X_test, y_test, predictions, clf_dt):
    predictions = clf_dt.predict(X_test)
    dataframe = pd.DataFrame(X_test.copy(), columns=input_x.columns)
    dataframe['y_test'] = y_test.values
    dataframe['predictions'] = predictions
    return create_tf_column(dataframe).value_counts().get(True) / dataframe.shape[0]

In [28]:
X_test

Unnamed: 0,Feed_Gap [mm],Feed_Inset [cm],Feed_W [cm],Ground_gap [mm],Pad_L [cm],Pad_W [cm],Freq [GHz]
3770,0.5,0.2,1.2,4,0.70,1.2,15.68
888,0.2,0.2,1.2,3,0.70,1.0,9.56
1157,0.3,0.2,1.2,3,0.70,1.0,15.68
318,0.5,0.2,1.2,4,0.70,0.9,12.53
2150,0.5,0.2,1.2,4,0.75,1.0,14.60
...,...,...,...,...,...,...,...
3899,0.5,0.2,1.2,3,0.75,1.2,9.20
2613,0.5,0.2,1.2,3,0.70,1.1,2.00
1698,0.5,0.2,1.2,4,0.70,1.0,10.10
92,0.5,0.2,1.2,3,0.70,0.9,10.28


In [29]:
joblib.dump(pipeline, "antenna_model.pkl")

['antenna_model.pkl']

In [30]:
print(f"Score within +-{threshold}: {get_score(X_test, y_test, predictions, pipeline)}")
print("RMSE:", mean_squared_error(y_test, predictions, squared=False))
print("R^2:", r2_score(y_test, predictions))

Score within +-1: 0.5965746907706946
RMSE: 2.8463567638862535
R^2: 0.8269845428140274
