# no normalizing version


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = np.genfromtxt('data.txt')

np.random.seed(1)
np.random.shuffle(data)

X = data[:,0:5]
Y = data[:,5:15]
dfX=pd.DataFrame(X)
dfy=pd.DataFrame(Y)

table=pd.concat([dfX, dfy], axis=1)
table.columns=['SiH4','N2O','Temp','Pressure','RF Power','Thickness','Depo. Rate','Uniformity','Ref. Index','Permittivity','Etch Rate','Stress','H2O','SiOH']

In [3]:
print(table)

     SiH4     N2O   Temp  Pressure  RF Power  Thickness  Depo. Rate  \
0   400.0   900.0  250.0      1.10      80.0       5.01       587.0   
1   400.0   900.0  200.0      0.50      80.0       5.31       324.0   
2   400.0   900.0  150.0      0.50      20.0       4.63       210.0   
3   300.0   650.0  300.0      1.03      85.0       4.98       433.0   
4   909.0    91.0  375.0      1.80     150.0       4.25      1064.0   
5   400.0   400.0  200.0      0.25      20.0       5.29       139.0   
6   667.0   333.0  375.0      1.80     150.0       4.49       749.0   
7   100.0   200.0  375.0      1.80     150.0       0.79        75.0   
8   400.0   900.0  200.0      1.80     150.0       4.99       396.0   
9   700.0  1400.0  375.0      1.80     150.0       2.85       813.0   
10  400.0   900.0  250.0      0.50     150.0       5.41       491.0   
11  400.0   900.0  200.0      1.10      20.0       5.78       375.0   
12  300.0   600.0  375.0      1.80     150.0       1.95       301.0   
13  55

In [4]:
X_train = X[0:30,:]
Y_train = Y[0:30,0]
X_test = X[30:36,:]
Y_test = Y[30:36,0]

## Gradient Boosting Regression

In [5]:
from sklearn import ensemble
from sklearn.metrics import mean_squared_error

In [6]:
params = {'n_estimators': 500, 'max_depth': 6, 'min_samples_split': 4,
          'learning_rate': 0.005, 'loss': 'ls'}

GBR = ensemble.GradientBoostingRegressor(**params)

GBR.fit(X_train, Y_train)
mse = mean_squared_error(Y_test, GBR.predict(X_test))
print("MSE: %.4f" % mse)



MSE: 1.0846


In [7]:
print(Y_test)
print(GBR.predict(X_test))

[ 3.36  5.68  4.8   4.92  4.36  5.75]
[ 5.59021115  5.32351961  5.34892729  4.95046393  5.3669432   5.44935533]


## Support Vecter Regression

### rbf kernel?


In [8]:
import numpy as np
from sklearn.svm import SVR

In [9]:
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)

svr_rbf.fit(X_train, Y_train)
mse = mean_squared_error(Y_test, svr_rbf.predict(X_test))
print("MSE: %.4f" % mse)

MSE: 0.8785


In [10]:
print(Y_test)
print(svr_rbf.predict(X_test))

[ 3.36  5.68  4.8   4.92  4.36  5.75]
[ 4.545       4.81631877  4.545       4.8796875   5.62572558  4.545     ]


### no kernel?


In [11]:
svr_lin = SVR(kernel='linear', C=1e3)

svr_lin.fit(X_train, Y_train)
mse = mean_squared_error(Y_test, svr_lin.predict(X_test))
print("MSE: %.4f" % mse)


MSE: 6022.5005


In [12]:
print(Y_test)
print(svr_lin.predict(X_test))

[ 3.36  5.68  4.8   4.92  4.36  5.75]
[ -50.27740678  -27.4037877   -61.26986226  -44.70336064 -153.90873607
  -11.1850799 ]


## Decision Tree Regression
### basic

In [13]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor

In [14]:
regr_1 = DecisionTreeRegressor(max_depth=4)

regr_1.fit(X_train, Y_train)
mse = mean_squared_error(Y_test, regr_1.predict(X_test))
print("MSE: %.4f" % mse)



MSE: 1.2366


In [15]:
print(Y_test)
print(regr_1.predict(X_test))

[ 3.36  5.68  4.8   4.92  4.36  5.75]
[ 5.74666667  5.203       5.294       5.203       5.294       5.203     ]


### with AdaBoost

In [16]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor

In [17]:
rng = np.random.RandomState(1)


regr_2 = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4),
                           n_estimators=300, random_state=rng)
regr_2.fit(X_train, Y_train)
mse = mean_squared_error(Y_test, regr_2.predict(X_test))
print("MSE: %.4f" % mse)




MSE: 0.8787


In [18]:
print(Y_test)
print(regr_2.predict(X_test))

[ 3.36  5.68  4.8   4.92  4.36  5.75]
[ 5.39416667  5.16416667  4.99        4.97        5.152       5.3       ]
