In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error 

Load the dataset from a csv file. This csv file has five features, which determined the PCB trace impedance Zo. The sweeping features are H1, Er1. 

In [9]:
data = pd.read_csv('MicroStrip_Training_H_ER.csv')
data

Unnamed: 0,H1,Er1,W1,W2,T1,Zo
0,2.0,3.0,4,3.5,0.7,54.1491
1,2.1,3.0,4,3.5,0.7,55.7520
2,2.2,3.0,4,3.5,0.7,57.2776
3,2.3,3.0,4,3.5,0.7,58.7739
4,2.4,3.0,4,3.5,0.7,60.2150
...,...,...,...,...,...,...
1436,14.6,5.0,4,3.5,0.7,106.1021
1437,14.7,5.0,4,3.5,0.7,106.3336
1438,14.8,5.0,4,3.5,0.7,106.5678
1439,14.9,5.0,4,3.5,0.7,106.7964


Because the sweeping features are only H1, Er1. So just make H1 and Er1 as the training featuers.

In [10]:
df = data.drop(columns = ['W1', 'W2', 'T1'])
df

Unnamed: 0,H1,Er1,Zo
0,2.0,3.0,54.1491
1,2.1,3.0,55.7520
2,2.2,3.0,57.2776
3,2.3,3.0,58.7739
4,2.4,3.0,60.2150
...,...,...,...
1436,14.6,5.0,106.1021
1437,14.7,5.0,106.3336
1438,14.8,5.0,106.5678
1439,14.9,5.0,106.7964


Create the training data and test data

In [11]:
X = df[['H1', 'Er1']]
y = df['Zo']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size = 0.1)

Build the pipeline with the Polynomial transformer, degree = 3, trained the model.

In [16]:
pipe = Pipeline([('transformer', PolynomialFeatures(degree=3)), 
                ('regression', LinearRegression())])
pipe.fit(X_train, y_train)

Pipeline(steps=[('transformer', PolynomialFeatures(degree=3)),
                ('regression', LinearRegression())])

This simple model can get very high score and the mean_squarred_error is also very smal.

In [17]:
pipe.score(X_test, y_test)

0.9996570604177024

In [21]:
mean_squared_error(pipe.predict(X_test), y_test) 

0.10204688218224203

Also tried 5 order Polynomial transfermer, the score is even higher and the mean squared error becomes very small.

In [23]:
pipe = Pipeline([('transformer', PolynomialFeatures(degree=5)), 
                ('regression', LinearRegression())])
pipe.fit(X_train, y_train)

Pipeline(steps=[('transformer', PolynomialFeatures(degree=5)),
                ('regression', LinearRegression())])

In [24]:
pipe.score(X_test, y_test)

0.9999959038586536

In [25]:
mean_squared_error(pipe.predict(X_test), y_test) 

0.0012188690806142603

So, order 5 Polynomial features are a very good model to predict the trace impedance.