# no normalizing version


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = np.genfromtxt('data_outlier little.txt')

np.random.seed(4)
np.random.shuffle(data)

X = data[:,0:6]
Y = data[:,6:16]
dfX=pd.DataFrame(X)
dfy=pd.DataFrame(Y)

table=pd.concat([dfX, dfy], axis=1)
table.columns=['SiH4','N2O','Temp','Pressure','RF Power','Time','Thickness','Depo. Rate','Uniformity','Ref. Index','Permittivity','Etch Rate','Stress','H2O','SiOH']

In [3]:
print(table)

     SiH4    N2O   Temp  Pressure  RF Power   Time  Thickness  Depo. Rate  \
0   667.0  333.0  375.0      1.80     150.0   60.0       4.49       749.0   
1   200.0  400.0  200.0      0.25     150.0  192.0       5.52       287.0   
2   400.0  400.0  200.0      1.80     150.0  136.0       4.80       352.0   
3   400.0  900.0  200.0      1.10      20.0  154.0       5.78       375.0   
4   400.0  900.0  150.0      1.80      80.0  128.0       5.23       409.0   
5   400.0  400.0  200.0      0.25      20.0  381.0       5.29       139.0   
6   400.0  900.0  250.0      1.10      80.0   85.0       5.01       587.0   
7   333.0  667.0  375.0      1.80     150.0   60.0       1.98       331.0   
8   200.0  400.0  400.0      0.25      20.0  468.0       5.76       123.0   
9   300.0  650.0  300.0      1.03      85.0  115.0       4.98       433.0   
10  400.0  900.0  400.0      0.25      20.0  600.0       3.36        56.0   
11  400.0  900.0  400.0      1.80     150.0  123.0       5.24       426.0   

In [4]:
X_train = X[0:23,:]
Y_train = Y[0:23,0]
X_test = X[23:28,:]
Y_test = Y[23:28,0]

## Linear_regression

### Multivariate regression  

In [5]:
from sklearn import linear_model
from sklearn.metrics import mean_squared_error

MulReg=linear_model.LinearRegression()
MulReg.fit(X_train, Y_train)

mse = mean_squared_error(Y_test, MulReg.predict(X_test))
print("MSE: %.4f" % mse)

MSE: 1.3817


In [6]:
print(Y_test)
print(MulReg.predict(X_test))

[ 5.1   5.68  4.99  4.83  5.14]
[ 3.38399326  6.472209    4.94792203  3.19210225  5.9472333 ]


### Ridge Regression (Linear least squares with l2 regularization)

In [7]:
ridge = linear_model.Ridge(alpha=0.35)
ridge.fit(X_train, Y_train)

mse = mean_squared_error(Y_test, ridge.predict(X_test))
print("MSE: %.4f" % mse)

MSE: 1.3080


In [8]:
print(Y_test)
print(ridge.predict(X_test))

[ 5.1   5.68  4.99  4.83  5.14]
[ 3.41548158  6.40952192  4.96062833  3.22845678  5.91738652]


### Lasso Regression (Linear least squares with l1 regularization)

In [9]:
lasso = linear_model.Lasso(alpha=0.35)
lasso.fit(X_train, Y_train)

mse = mean_squared_error(Y_test, lasso.predict(X_test))
print("MSE: %.4f" % mse)

MSE: 0.4800


In [10]:
print(Y_test)
print(lasso.predict(X_test))

[ 5.1   5.68  4.99  4.83  5.14]
[ 3.98191211  5.26854015  5.19455128  3.88993802  5.37434426]


# Support Vecter Regression



### *가장 낮은 MSE값을 가지는 C와 kernel을 선택

In [None]:
import numpy as np
from sklearn.svm import SVR

In [None]:
for c in [0.01,0.1,1,10,100]:
    for i in ["linear","rbf","sigmoid"]:
        for e in [0.001,0.01,0.1]:
            svr_ck = SVR(kernel=i, C=c, epsilon=e)
            svr_ck.fit(X_train, Y_train)
            mse = mean_squared_error(Y_test, svr_ck.predict(X_test))
            print("kernel:",i,"\n","C:",c,"\n","MSE: ",mse,"\n","epsilon:",e,"\n" ,"-"*25)

kernel: linear 
 C: 0.01 
 MSE:  0.131443694416 
 epsilon: 0.001 
 -------------------------
kernel: linear 
 C: 0.01 
 MSE:  0.12799502732 
 epsilon: 0.01 
 -------------------------
kernel: linear 
 C: 0.01 
 MSE:  0.124679517757 
 epsilon: 0.1 
 -------------------------
kernel: rbf 
 C: 0.01 
 MSE:  0.101065 
 epsilon: 0.001 
 -------------------------
kernel: rbf 
 C: 0.01 
 MSE:  0.09868 
 epsilon: 0.01 
 -------------------------
kernel: rbf 
 C: 0.01 
 MSE:  0.08566 
 epsilon: 0.1 
 -------------------------
kernel: sigmoid 
 C: 0.01 
 MSE:  0.10134 
 epsilon: 0.001 
 -------------------------
kernel: sigmoid 
 C: 0.01 
 MSE:  0.10134 
 epsilon: 0.01 
 -------------------------
kernel: sigmoid 
 C: 0.01 
 MSE:  0.08838 
 epsilon: 0.1 
 -------------------------
kernel: linear 
 C: 0.1 
 MSE:  0.0763474958609 
 epsilon: 0.001 
 -------------------------
kernel: linear 
 C: 0.1 
 MSE:  0.0821607756645 
 epsilon: 0.01 
 -------------------------
kernel: linear 
 C: 0.1 
 MSE:  0.0

### rbf kernel?

In [None]:
import numpy as np
from sklearn.svm import SVR

In [None]:
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)

svr_rbf.fit(X_train, Y_train)
mse = mean_squared_error(Y_test, svr_rbf.predict(X_test))
print("MSE: %.4f" % mse)

In [None]:
print(Y_test)
print(svr_rbf.predict(X_test))

### poly kernel 

In [None]:
svr_poly = SVR(kernel='poly', C=1e3, gamma=0.1)

svr_poly.fit(X_train, Y_train)
mse = mean_squared_error(Y_test, svr_poly.predict(X_test))
print("MSE: %.4f" % mse)

In [None]:
print(Y_test)
print(svr_poly.predict(X_test))

### sigmoid kernel

In [None]:
svr_sigmoid = SVR(kernel='sigmoid', C=1e3, gamma=0.1)

svr_sigmoid.fit(X_train, Y_train)
mse = mean_squared_error(Y_test, svr_sigmoid.predict(X_test))
print("MSE: %.4f" % mse)

In [None]:
print(Y_test)
print(svr_sigmoid.predict(X_test))

### no kernel?


In [None]:
svr_lin = SVR(kernel='linear', C=1e3)

svr_lin.fit(X_train, Y_train)

mse = mean_squared_error(Y_test, svr_lin.predict(X_test))
print("MSE: %.4f" % mse)

In [None]:
print(Y_test)
print(svr_lin.predict(X_test))

## Kernel Ridge Regression 

In [None]:
from sklearn.kernel_ridge import KernelRidge

kr_linear = KernelRidge(kernel='linear', gamma=0.1)
kr_rbf = KernelRidge(kernel='rbf', gamma=0.1)
kr_poly = KernelRidge(kernel='poly', gamma=0.1)

kr_linear.fit(X_train, Y_train)
kr_rbf.fit(X_train, Y_train)
kr_poly.fit(X_train, Y_train)

mse = mean_squared_error(Y_test, kr_linear.predict(X_test))
print("MSE(linear): %.4f" % mse)
mse = mean_squared_error(Y_test, kr_rbf.predict(X_test))
print("MSE(rbf): %.4f" % mse)
mse = mean_squared_error(Y_test, kr_poly.predict(X_test))
print("MSE(poly): %.4f" % mse)

In [None]:
print(Y_test)
print("Linear result") 
print(kr_linear.predict(X_test))
print("rbf result") 
print(kr_rbf.predict(X_test))
print("poly result") 
print(kr_poly.predict(X_test))

## Decision Tree Regression
### basic

In [None]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor

In [None]:
regr_1 = DecisionTreeRegressor(max_depth=4)

regr_1.fit(X_train, Y_train)

mse = mean_squared_error(Y_test, regr_1.predict(X_test))
print("MSE: %.4f" % mse)



In [None]:
print(Y_test)
print(regr_1.predict(X_test))


### with AdaBoost

In [None]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor

In [None]:
rng = np.random.RandomState(1)


regr_2 = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4),
                           n_estimators=300, random_state=rng)
regr_2.fit(X_train, Y_train)

mse = mean_squared_error(Y_test, regr_2.predict(X_test))
print("MSE: %.4f" % mse)




In [None]:
print(Y_test)
print(regr_2.predict(X_test))

## Gradient Boosting Regression

In [None]:
from sklearn import ensemble

In [None]:
params = {'n_estimators': 500, 'max_depth': 6, 'min_samples_split': 4,
          'learning_rate': 0.005, 'loss': 'ls'}

GBR = ensemble.GradientBoostingRegressor(**params)

GBR.fit(X_train, Y_train)

mse = mean_squared_error(Y_test, GBR.predict(X_test))
print("MSE: %.4f" % mse)

In [None]:
print(Y_test)
print(GBR.predict(X_test))

# K-fold(K=7)