# no normalizing version


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = np.genfromtxt('data_dep_v2.txt')

np.random.seed(12)
np.random.shuffle(data)

X = data[:,0:6]
Y = data[:,6]
dfX=pd.DataFrame(X)
dfy=pd.DataFrame(Y)

table=pd.concat([dfX, dfy], axis=1)
table.columns=['SiH4','N2O','Temp','Pressure','RF Power','Time','dep.rate(nano)']

In [3]:
print(table)

     SiH4     N2O   Temp  Pressure  RF Power   Time  dep.rate(nano)
0   833.0   167.0  375.0      1.80     150.0   45.0           10.05
1   400.0   900.0  200.0      0.25     150.0  157.0            3.62
2   200.0   400.0  200.0      1.80      20.0  187.0            3.04
3   200.0   400.0  200.0      0.25     150.0  192.0            2.87
4   200.0   400.0  400.0      1.80     150.0  281.0            1.73
5   200.0   900.0  400.0      1.80      20.0  180.0            2.68
6   400.0   900.0  150.0      1.80      80.0  128.0            4.09
7   300.0   650.0  300.0      1.03      85.0  115.0            4.33
8   400.0   900.0  400.0      0.25      20.0  600.0            0.56
9   909.0    91.0  375.0      1.80     150.0   40.0           10.64
10  200.0   900.0  400.0      0.25     150.0  243.0            2.22
11  400.0   400.0  200.0      0.25      20.0  381.0            1.39
12  400.0   400.0  400.0      1.80      20.0  111.0            4.60
13  400.0   400.0  400.0      0.25     150.0  13

In [4]:
from sklearn.metrics import mean_squared_error




def test_1(func, x, y, k) :
    
    L = x.shape[0]
       
    if k >= L :
        print('error\n')
        return -1
    
    d = int(L/k)
      
    mse = 0.0
    
    for i in range(0,d) :
        
        x_test = x[i*k:(i+1)*k]
        y_test = y[i*k:(i+1)*k]
        
        x_train = np.concatenate([x[0 : i*k], x[(i+1)*k : L]], axis=0)
        y_train = np.concatenate([y[0 : i*k], y[(i+1)*k : L]], axis=0)
        
        func.fit(x_train, y_train)
        print("%d MSE : %f" %(i+1,mean_squared_error(y_test, func.predict(x_test))))
        mse = mse + mean_squared_error(y_test, func.predict(x_test))
        
    x_test = x[d*k:L]
    y_test = y[d*k:L]
    x_train = x[0:d*k]
    y_train = y[0:d*k]
    
    func.fit(x_train, y_train)
    print("%d MSE : %f"%(d+1, mean_squared_error(y_test, func.predict(x_test))))
    mse = mse + mean_squared_error(y_test, func.predict(x_test))
    
    return mse/(d+1)

def test_2(func, x, y) :
    
    L = x.shape[0]
      
    x_test = x[0:L - 5]
    y_test = y[0:L - 5]
    x_train = x[L-5:L]
    y_train = y[L-5:L]
    
    func.fit(x_train, y_train)
    print("MSE : %f"%( mean_squared_error(y_test, func.predict(x_test))))
    mse = mean_squared_error(y_test, func.predict(x_test))
    
    return mse



X_train = X
Y_train = Y

## Linear_regression

### Multivariate regression  

In [5]:
from sklearn import linear_model

MulReg=linear_model.LinearRegression()

mse = test_1(MulReg, X_train, Y_train, k = 5)


print("MSE: %.4f" % mse)

1 MSE : 0.745490
2 MSE : 0.689580
3 MSE : 0.969179
4 MSE : 0.380060
5 MSE : 2.221112
6 MSE : 0.530656
MSE: 0.9227


### Ridge Regression (Linear least squares with l2 regularization)

In [6]:
ridge = linear_model.Ridge(alpha=0.35)

mse = test_1(ridge, X_train, Y_train, k = 5)

print("MSE: %.4f" % mse)

1 MSE : 0.745555
2 MSE : 0.689629
3 MSE : 0.966624
4 MSE : 0.380184
5 MSE : 2.219479
6 MSE : 0.528627
MSE: 0.9217


### Lasso Regression (Linear least squares with l1 regularization)

In [7]:
lasso = linear_model.Lasso(alpha=0.35)


mse = test_1(lasso, X_train, Y_train, k = 5)


print("MSE: %.4f" % mse)

1 MSE : 0.744219
2 MSE : 0.695555
3 MSE : 0.930884
4 MSE : 0.388015
5 MSE : 2.182488
6 MSE : 0.468580
MSE: 0.9016


# Support Vecter Regression



### *가장 낮은 MSE값을 가지는 C와 kernel을 선택

In [8]:
import numpy as np
from sklearn.svm import SVR

In [9]:
for c in [0.01,0.1,1,10,100]:
    for i in ["linear","rbf","sigmoid"]:
        for e in [0.001,0.01,0.1]:
            svr_ck = SVR(kernel=i, C=c, epsilon=e)
            
            mse = test_2(svr_ck, X_train, Y_train)
            
            print("kernel:",i,"\n","C:",c,"\n","MSE: ",mse,"\n","epsilon:",e,"\n" ,"-"*25)

MSE : 31.554629
kernel: linear 
 C: 0.01 
 MSE:  31.5546287277 
 epsilon: 0.001 
 -------------------------
MSE : 30.680402
kernel: linear 
 C: 0.01 
 MSE:  30.6804015226 
 epsilon: 0.01 
 -------------------------
MSE : 22.655434
kernel: linear 
 C: 0.01 
 MSE:  22.6554339569 
 epsilon: 0.1 
 -------------------------
MSE : 5.877662
kernel: rbf 
 C: 0.01 
 MSE:  5.87766190476 
 epsilon: 0.001 
 -------------------------
MSE : 5.877662
kernel: rbf 
 C: 0.01 
 MSE:  5.87766190476 
 epsilon: 0.01 
 -------------------------
MSE : 5.877662
kernel: rbf 
 C: 0.01 
 MSE:  5.87766190476 
 epsilon: 0.1 
 -------------------------
MSE : 5.877662
kernel: sigmoid 
 C: 0.01 
 MSE:  5.87766190476 
 epsilon: 0.001 
 -------------------------
MSE : 5.877662
kernel: sigmoid 
 C: 0.01 
 MSE:  5.87766190476 
 epsilon: 0.01 
 -------------------------
MSE : 5.877662
kernel: sigmoid 
 C: 0.01 
 MSE:  5.87766190476 
 epsilon: 0.1 
 -------------------------
MSE : 31.554629
kernel: linear 
 C: 0.1 
 MSE:  3

### rbf kernel?

In [10]:
import numpy as np
from sklearn.svm import SVR

In [11]:
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)

mse = test_1(svr_rbf, X_train, Y_train, k = 5)


print("MSE: %.4f" % mse)

1 MSE : 9.030690
2 MSE : 11.815228
3 MSE : 2.186680
4 MSE : 0.853981
5 MSE : 6.530772
6 MSE : 0.174216
MSE: 5.0986


### poly kernel 

In [12]:
#svr_poly = SVR(kernel='poly', C=1e3, gamma=0.1)

#mse = test_1(svr_poly, X_train, Y_train, k = 5)


#print("MSE: %.4f" % mse)

### sigmoid kernel

In [13]:
svr_sigmoid = SVR(kernel='sigmoid', C=1e3, gamma=0.1)

mse = test_1(svr_sigmoid, X_train, Y_train, k = 5)


print("MSE: %.4f" % mse)

1 MSE : 9.013160
2 MSE : 12.145945
3 MSE : 1.757360
4 MSE : 1.024345
5 MSE : 6.369825
6 MSE : 0.483025
MSE: 5.1323


### no kernel?


In [14]:
#svr_lin = SVR(kernel='linear', C=1e2)


#mse = test_1(svr_lin, X_train, Y_train, k = 5)


#print("MSE: %.4f" % mse)

## Kernel Ridge Regression 

In [15]:
from sklearn.kernel_ridge import KernelRidge


kr_linear = KernelRidge(kernel='linear', gamma=0.1)
kr_rbf = KernelRidge(kernel='rbf', gamma=0.1)
kr_poly = KernelRidge(kernel='poly', gamma=0.1)




mse = test_1(kr_linear, X_train, Y_train, k = 5)
print("MSE(linear): %.4f\n" % mse)
mse = test_1(kr_rbf, X_train, Y_train, k = 5)
print("MSE(rbf): %.4f\n" % mse)
mse = test_1(kr_poly, X_train, Y_train, k = 5)
print("MSE(poly): %.4f\n" % mse)

1 MSE : 0.848610
2 MSE : 0.831961
3 MSE : 0.544645
4 MSE : 0.384094
5 MSE : 1.947071
6 MSE : 0.869312
MSE(linear): 0.9043

1 MSE : 26.915660
2 MSE : 28.445960
3 MSE : 12.204500
4 MSE : 12.547085
5 MSE : 17.827020
6 MSE : 18.147600
MSE(rbf): 19.3480

1 MSE : 18.188351
2 MSE : 111.132252
3 MSE : 117.162871
4 MSE : 3.019767
5 MSE : 257.975849
6 MSE : 0.022484
MSE(poly): 84.5836



## Decision Tree Regression
### basic

In [16]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor

In [17]:
regr_1 = DecisionTreeRegressor(max_depth=4)

mse = test_1(regr_1, X_train, Y_train, k = 5)

print("MSE: %.4f" % mse)



1 MSE : 0.167902
2 MSE : 1.386256
3 MSE : 0.198410
4 MSE : 0.119245
5 MSE : 1.489790
6 MSE : 0.000400
MSE: 0.5603


### with AdaBoost

In [18]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor

In [19]:
rng = np.random.RandomState(1)


regr_2 = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4),
                           n_estimators=300, random_state=rng)


mse = test_1(regr_2, X_train, Y_train, k = 5)


print("MSE: %.4f" % mse)




1 MSE : 0.245422
2 MSE : 0.127520
3 MSE : 0.120703
4 MSE : 0.129513
5 MSE : 0.836134
6 MSE : 0.013225
MSE: 0.2454


## Gradient Boosting Regression

In [20]:
from sklearn import ensemble

In [21]:
params = {'n_estimators': 500, 'max_depth': 6, 'min_samples_split': 4,
          'learning_rate': 0.005, 'loss': 'ls'}

GBR = ensemble.GradientBoostingRegressor(**params)



mse = test_1(GBR, X_train, Y_train, k = 5)

print("MSE: %.4f" % mse)

1 MSE : 0.114514
2 MSE : 0.409855
3 MSE : 0.153836
4 MSE : 0.163267
5 MSE : 0.226841
6 MSE : 0.012582
MSE: 0.1801
