# KNN  -  Accuracy estimation

In [6]:
import numpy as np
import pandas as pd
import csv
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
pd.options.mode.chained_assignment = None

# Load dataset
df = pd.read_excel('HCCI CFR data.xlsx', sheet_name = 'Data Compiled', index_col=0)
target = df['Output']
features = df[df.columns[0:9]]

# Define search space
n_neighbors = [2,3,5,6,7,9,10,11,13,14,15,17,18,19]

# Setup the grid to be searched over
param_grid = dict(n_neighbors=n_neighbors)

# Define outer folds
kFolds = KFold(n_splits=10, shuffle=True, random_state=1).split(X=features.values, y=target.values)

# Define inner folds
grid_search = GridSearchCV(KNeighborsClassifier(weights='uniform'), param_grid, cv=KFold(n_splits=10, shuffle=True, random_state=1),
                           n_jobs=19, verbose=1, scoring='precision_micro')

# Open results file and write out headers
out_file = open("grid_search_KNN.csv", 'w')
wr = csv.writer(out_file, dialect='excel')
headers = ['neighbours', 'micro_precision']
wr.writerow(headers)
out_file.flush()

KNN_file = open("KNN_results.csv", 'w',newline='')
wrr = csv.writer(KNN_file, dialect='excel')
headers = ['Actual', 'Predicted']
wrr.writerow(headers)
KNN_file.flush()


for index_train, index_test in kFolds:
    
    # Get train and test splits
    x_train, x_test = features.iloc[index_train].values, features.iloc[index_test].values
    y_train, y_test = target.iloc[index_train].values, target.iloc[index_test].values

    # Apply min max normalization
    scaler = MinMaxScaler().fit(x_train)
    x_train = scaler.transform(x_train)
    x_test = scaler.transform(x_test)

    # Fit
    grid_search.fit(x_train, y_train)

    # Get best params
    best_params = grid_search.best_params_
    
    ##Testing
    knn = KNeighborsClassifier(n_neighbors=best_params['n_neighbors'], weights='uniform')
    knn.fit(x_train, y_train)
    Y_pred = knn.predict(x_test)
    print("precision:",metrics.precision_score(Y_pred, y_test, average='micro'))

    # Write results
    row = [best_params['n_neighbors'], metrics.precision_score(Y_pred, y_test, average='micro')]
    wr.writerow(row)
    out_file.flush()
    
    # Write results
    for i in range(len(y_test)):
        row = (y_test[i], Y_pred[i])
        wrr.writerow(row)
        KNN_file.flush()
    
out_file.close()
KNN_file.close()

Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=19)]: Done  12 tasks      | elapsed:    4.9s
[Parallel(n_jobs=19)]: Done 140 out of 140 | elapsed:   17.1s finished


precision: 0.9
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=19)]: Done  12 tasks      | elapsed:    5.2s
[Parallel(n_jobs=19)]: Done 140 out of 140 | elapsed:   16.9s finished


precision: 0.897115384615
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=19)]: Done  12 tasks      | elapsed:    5.3s
[Parallel(n_jobs=19)]: Done 140 out of 140 | elapsed:   16.2s finished


precision: 0.876923076923
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=19)]: Done  12 tasks      | elapsed:    5.4s
[Parallel(n_jobs=19)]: Done 140 out of 140 | elapsed:   16.3s finished


precision: 0.880769230769
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=19)]: Done  12 tasks      | elapsed:    5.8s
[Parallel(n_jobs=19)]: Done 140 out of 140 | elapsed:   16.6s finished


precision: 0.904807692308
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=19)]: Done  12 tasks      | elapsed:    5.4s
[Parallel(n_jobs=19)]: Done 140 out of 140 | elapsed:   17.0s finished


precision: 0.886429258903
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=19)]: Done  12 tasks      | elapsed:    5.1s
[Parallel(n_jobs=19)]: Done 140 out of 140 | elapsed:   15.4s finished


precision: 0.898941289702
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=19)]: Done  12 tasks      | elapsed:    5.2s
[Parallel(n_jobs=19)]: Done 140 out of 140 | elapsed:   15.3s finished


precision: 0.881616939365
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=19)]: Done  12 tasks      | elapsed:    5.2s
[Parallel(n_jobs=19)]: Done 140 out of 140 | elapsed:   16.4s finished


precision: 0.881616939365
Fitting 10 folds for each of 14 candidates, totalling 140 fits


[Parallel(n_jobs=19)]: Done  12 tasks      | elapsed:    5.4s
[Parallel(n_jobs=19)]: Done 140 out of 140 | elapsed:   16.1s finished


precision: 0.876804619827


# SVM  -  Accuracy estimation

In [2]:
import numpy as np
import pandas as pd
import csv
import pickle
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
pd.options.mode.chained_assignment = None

# Load dataset
df = pd.read_excel('HCCI CFR data.xlsx', sheet_name = 'Data Compiled', index_col=0)
target = df['Output']
# features = df[df.columns[0:7]]
#for sensitivity analysis remove a feature and run this block -- repeat this for all the seven features
features = df[['RON','S','Fuel rate','O2','Intake Temperature','Intake Pressure','Compression ratio']]
# Define search space
C = [1, 10, 100, 1000, 10000]

# Setup the grid to be searched over
param_grid = dict(C=C)

####Test sets accuracy 

# Define outer folds
kFolds = KFold(n_splits=10, shuffle=True, random_state=1).split(X=features.values, y=target.values)

# Define inner folds
grid_search = GridSearchCV(SVC(gamma='auto', kernel = 'rbf'), param_grid, cv=KFold(n_splits=10, shuffle=True, random_state=1),
                           n_jobs=19, verbose=1, scoring='precision_micro')

# Open results file and write out headers
out_file = open("grid_search_SVM.csv", 'w')
wr = csv.writer(out_file, dialect='excel')
headers = ['C', 'micro_precision']
wr.writerow(headers)
out_file.flush()

SVM_file = open("SVM_results.csv", 'w',newline='')
wrr = csv.writer(SVM_file, dialect='excel')
headers = ['Actual', 'Predicted']
wrr.writerow(headers)
SVM_file.flush()

for index_train, index_test in kFolds:
    # Get train and test splits
    x_train, x_test = features.iloc[index_train].values, features.iloc[index_test].values
    y_train, y_test = target.iloc[index_train].values, target.iloc[index_test].values

    # Apply min max normalization
    scaler = MinMaxScaler().fit(x_train)
    x_train = scaler.transform(x_train)
    x_test = scaler.transform(x_test)

    # Fit
    grid_search.fit(x_train, y_train)

    # Get best params
    best_params = grid_search.best_params_
    
    ##Testing
    svm = SVC(gamma='auto', kernel = 'rbf', C=best_params['C'])
    svm.fit(x_train, y_train)
    Y_pred = svm.predict(x_test)
    print("precision:",metrics.precision_score(Y_pred, y_test, average='micro'))

    # Write results
    row = [best_params['C'], metrics.precision_score(Y_pred, y_test, average='micro')]
    wr.writerow(row)
    out_file.flush()

    # Write results
    for i in range(len(y_test)):
        row = (y_test[i], Y_pred[i])
        wrr.writerow(row)
        SVM_file.flush()

out_file.close()
SVM_file.close()


Fitting 10 folds for each of 5 candidates, totalling 50 fits


[Parallel(n_jobs=19)]: Done  12 tasks      | elapsed:   11.1s
[Parallel(n_jobs=19)]: Done  50 out of  50 | elapsed:   57.9s finished


{'C': 1000}
0.919865319865


# SVM  -  Final Model

In [None]:
import numpy as np
import pandas as pd
import csv
import pickle
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
pd.options.mode.chained_assignment = None

# Load dataset
df = pd.read_excel('HCCI CFR data.xlsx', sheet_name = 'Data Compiled', index_col=0)
target = df['Output']
# features = df[df.columns[0:9]]
features = df[['RON','S','Fuel rate','O2','Intake Temperature','Intake Pressure','Compression ratio']]
# features = df[['RON','S','Fuel rate','O2','Intake Temperature','Intake Pressure','Compression ratio', 'M.W', 'LHV(KJ/kg)']]
# Define search space
C = [1, 10, 100, 1000, 10000]

# Setup the grid to be searched over
param_grid = dict(C=C)


#####Save Final Model

# Define grid search
grid_search = GridSearchCV(SVC(kernel='rbf', gamma='auto'), param_grid, cv=KFold(n_splits=10, shuffle=True, random_state=1),
                           n_jobs=19, verbose=1, scoring='precision_micro')

# Split data in to features and target
x_train = features.values
y_train = target.values

# Apply min max normalization
scaler = MinMaxScaler().fit(x_train)
x_train = scaler.transform(x_train)

# Find best parameters
grid_search.fit(x_train, y_train)
print(grid_search.best_params_)
print(grid_search.best_score_)

# Retrain model with best parameters found from grid search
best_params = grid_search.best_params_
model = SVC(kernel='rbf', gamma='auto', C=best_params['C'])
model.fit(x_train, y_train)

# save the model
filename = 'final_SVR_model.sav'
pickle.dump(model, open(filename, 'wb'))


# Data for contour diagrams

In [1]:
import numpy as np
import pandas as pd
import pickle
import csv
from keras.models import load_model

# Load SVR model
filename = 'final_SVR_model.sav'
model = pickle.load(open(filename, 'rb'))

out_file = open("Counter_diagram_data.csv", 'w')
wr = csv.writer(out_file, dialect='excel', lineterminator = '\n')
headers = ['RON','S','Fuel rate','O2','Intake Temperature','Intake Pressure','CR','MW','LHV','Output']
wr.writerow(headers)
out_file.flush()


for i in range(0,101,1):
    for j in range (0,101,1):
        inp = [[i/100,0.34,j/100,1,0.3333,0,0]]
        res = model.predict(inp)
        #Write results
        row = [inp[0][0],inp[0][1],inp[0][2],inp[0][3],inp[0][4],inp[0][5],inp[0][6],res[0]]
        wr.writerow(row)
        out_file.flush()

for i in range(0,101,1):
    for j in range (0,101,1):
        inp = [[i/100,0.34,j/100,1,0.3333,0,0.3333]]
        res = model.predict(inp)
        #Write results
        row = [inp[0][0],inp[0][1],inp[0][2],inp[0][3],inp[0][4],inp[0][5],inp[0][6],res[0]]
        wr.writerow(row)
        out_file.flush()

for i in range(0,101,1):
    for j in range (0,101,1):
        inp = [[i/100,0.34,j/100,1,0.3333,0,0.6666]]
        res = model.predict(inp)
        #Write results
        row = [inp[0][0],inp[0][1],inp[0][2],inp[0][3],inp[0][4],inp[0][5],inp[0][6],res[0]]
        wr.writerow(row)
        out_file.flush()

for i in range(0,101,1):
    for j in range (0,101,1):
        inp = [[i/100,0.34,j/100,1,0.3333,0,1]]
        res = model.predict(inp)
        #Write results
        row = [inp[0][0],inp[0][1],inp[0][2],inp[0][3],inp[0][4],inp[0][5],inp[0][6],res[0]]
        wr.writerow(row)
        out_file.flush()


for i in range(0,101,1):
    for j in range (0,101,1):
        inp = [[i/100,0.34,j/100,1,0.3333,0.2,0]]
        res = model.predict(inp)
        #Write results
        row = [inp[0][0],inp[0][1],inp[0][2],inp[0][3],inp[0][4],inp[0][5],inp[0][6],res[0]]
        wr.writerow(row)
        out_file.flush()

for i in range(0,101,1):
    for j in range (0,101,1):
        inp = [[i/100,0.34,j/100,1,0.3333,0.2,0.3333]]
        res = model.predict(inp)
        #Write results
        row = [inp[0][0],inp[0][1],inp[0][2],inp[0][3],inp[0][4],inp[0][5],inp[0][6],res[0]]
        wr.writerow(row)
        out_file.flush()

for i in range(0,101,1):
    for j in range (0,101,1):
        inp = [[i/100,0.34,j/100,1,0.3333,0.2,0.6666]]
        res = model.predict(inp)
        #Write results
        row = [inp[0][0],inp[0][1],inp[0][2],inp[0][3],inp[0][4],inp[0][5],inp[0][6],res[0]]
        wr.writerow(row)
        out_file.flush()

for i in range(0,101,1):
    for j in range (0,101,1):
        inp = [[i/100,0.34,j/100,1,0.3333,0.2,1]]
        res = model.predict(inp)
        #Write results
        row = [inp[0][0],inp[0][1],inp[0][2],inp[0][3],inp[0][4],inp[0][5],inp[0][6],res[0]]
        wr.writerow(row)
        out_file.flush()

out_file.close()

Using TensorFlow backend.
