# no normalizing version


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = np.genfromtxt('data.txt')

np.random.seed(12)
np.random.shuffle(data)

X = data[:,0:6]
Y = data[:,6:16]
dfX=pd.DataFrame(X)
dfy=pd.DataFrame(Y)

table=pd.concat([dfX, dfy], axis=1)
table.columns=['SiH4','N2O','Temp','Pressure','RF Power','Time','Thickness','Depo. Rate','Uniformity','Ref. Index','Permittivity','Etch Rate','Stress','H2O','SiOH']

In [3]:
print(table)

     SiH4     N2O   Temp  Pressure  RF Power   Time  Thickness  Depo. Rate  \
0   667.0   333.0  375.0      1.80     150.0   60.0       4.49       749.0   
1   550.0  1100.0  375.0      1.80     150.0   25.0       1.57       628.0   
2   300.0   600.0  375.0      1.80     150.0   65.0       1.95       301.0   
3   700.0  1400.0  375.0      1.80     150.0   35.0       2.85       813.0   
4   200.0   900.0  200.0      1.80     150.0  261.0       5.12       196.0   
5   200.0   900.0  400.0      1.80      20.0  180.0       4.83       268.0   
6   400.0   900.0  200.0      1.10      20.0  154.0       5.78       375.0   
7   400.0   900.0  400.0      0.25      20.0  600.0       3.36        56.0   
8   909.0    91.0  375.0      1.80     150.0   40.0       4.25      1064.0   
9   200.0   400.0  200.0      1.80      20.0  187.0       5.69       304.0   
10  400.0   400.0  400.0      1.80      20.0  111.0       5.10       460.0   
11  400.0   900.0  150.0      0.50      20.0  220.0       4.63  

In [4]:
from sklearn.metrics import mean_squared_error




def test_1(func, x, y, k) :
    
    L = x.shape[0]
       
    if k >= L :
        print('error\n')
        return -1
    
    d = int(L/k)
      
    mse = 0.0
    
    for i in range(0,d) :
        
        x_test = x[i*k:(i+1)*k]
        y_test = y[i*k:(i+1)*k]
        
        x_train = np.concatenate([x[0 : i*k], x[(i+1)*k : L]], axis=0)
        y_train = np.concatenate([y[0 : i*k], y[(i+1)*k : L]], axis=0)
        
        func.fit(x_train, y_train)
        print("%d MSE : %f" %(i+1,mean_squared_error(y_test, func.predict(x_test))))
        mse = mse + mean_squared_error(y_test, func.predict(x_test))
        
    x_test = x[d*k:L]
    y_test = y[d*k:L]
    x_train = x[0:d*k]
    y_train = y[0:d*k]
    
    func.fit(x_train, y_train)
    print("%d MSE : %f"%(d+1, mean_squared_error(y_test, func.predict(x_test))))
    mse = mse + mean_squared_error(y_test, func.predict(x_test))
    
    return mse/(d+1)

def test_2(func, x, y) :
    
    L = x.shape[0]
      
    x_test = x[0:L - 5]
    y_test = y[0:L - 5]
    x_train = x[L-5:L]
    y_train = y[L-5:L]
    
    func.fit(x_train, y_train)
    print("MSE : %f"%( mean_squared_error(y_test, func.predict(x_test))))
    mse = mean_squared_error(y_test, func.predict(x_test))
    
    return mse



X_train = X[:,:]
Y_train = Y[:,0]

## Linear_regression

### Multivariate regression  

In [5]:
from sklearn import linear_model

MulReg=linear_model.LinearRegression()

mse = test_1(MulReg, X_train, Y_train, k = 5)


print("MSE: %.4f" % mse)

1 MSE : 3.489832
2 MSE : 2.577057
3 MSE : 0.736464
4 MSE : 3.521751
5 MSE : 1.754745
6 MSE : 0.908321
7 MSE : 1.270618
8 MSE : 0.024112
MSE: 1.7854


### Ridge Regression (Linear least squares with l2 regularization)

In [6]:
ridge = linear_model.Ridge(alpha=0.35)

mse = test_1(ridge, X_train, Y_train, k = 5)

print("MSE: %.4f" % mse)

1 MSE : 3.504416
2 MSE : 2.565328
3 MSE : 0.727332
4 MSE : 3.535199
5 MSE : 1.766276
6 MSE : 0.903486
7 MSE : 1.287312
8 MSE : 0.018671
MSE: 1.7885


### Lasso Regression (Linear least squares with l1 regularization)

In [7]:
lasso = linear_model.Lasso(alpha=0.35)


mse = test_1(lasso, X_train, Y_train, k = 5)


print("MSE: %.4f" % mse)

1 MSE : 4.053972
2 MSE : 2.327574
3 MSE : 0.632746
4 MSE : 3.956104
5 MSE : 2.182056
6 MSE : 0.893283
7 MSE : 1.790052
8 MSE : 0.134147
MSE: 1.9962


# Support Vecter Regression



### rbf kernel?

In [10]:
import numpy as np
from sklearn.svm import SVR

In [11]:
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)

mse = test_1(svr_rbf, X_train, Y_train, k = 5)


print("MSE: %.4f" % mse)

1 MSE : 4.597120
2 MSE : 0.881018
3 MSE : 0.103333
4 MSE : 3.351077
5 MSE : 0.895360
6 MSE : 1.903993
7 MSE : 0.840897
8 MSE : 0.049580
MSE: 1.5778


### sigmoid kernel

In [13]:
svr_sigmoid = SVR(kernel='sigmoid', C=1e3, gamma=0.1)

mse = test_1(svr_sigmoid, X_train, Y_train, k = 5)


print("MSE: %.4f" % mse)

1 MSE : 5.563680
2 MSE : 0.897400
3 MSE : 0.051145
4 MSE : 3.807880
5 MSE : 0.243940
6 MSE : 2.069480
7 MSE : 0.279640
8 MSE : 0.062500
MSE: 1.6220


## Decision Tree Regression
### basic

In [16]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor

In [17]:
regr_1 = DecisionTreeRegressor(max_depth=4)

mse = test_1(regr_1, X_train, Y_train, k = 5)

print("MSE: %.4f" % mse)



1 MSE : 3.558679
2 MSE : 2.943755
3 MSE : 0.227808
4 MSE : 3.691493
5 MSE : 0.062966
6 MSE : 1.169200
7 MSE : 0.183074
8 MSE : 0.171848
MSE: 1.5011


### with AdaBoost

In [18]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor

In [19]:
rng = np.random.RandomState(1)


regr_2 = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4),
                           n_estimators=300, random_state=rng)


mse = test_1(regr_2, X_train, Y_train, k = 5)
print("MSE: %.4f" % mse)




1 MSE : 1.830838
2 MSE : 1.416344
3 MSE : 0.218121
4 MSE : 3.762550
5 MSE : 0.097482
6 MSE : 1.207140
7 MSE : 0.218680
8 MSE : 0.126846
MSE: 1.1098


## Gradient Boosting Regression

In [20]:
from sklearn import ensemble

In [21]:
params = {'n_estimators': 500, 'max_depth': 6, 'min_samples_split': 4,
          'learning_rate': 0.005, 'loss': 'ls'}

GBR = ensemble.GradientBoostingRegressor(**params)



mse = test_1(GBR, X_train, Y_train, k = 5)

print("MSE: %.4f" % mse)

1 MSE : 1.023105
2 MSE : 1.429912
3 MSE : 0.091072
4 MSE : 3.625780
5 MSE : 0.042804
6 MSE : 1.098253
7 MSE : 0.227660
8 MSE : 0.161117
MSE: 0.9625
