In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA, KernelPCA

In [2]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

In [3]:
from sklearn import preprocessing

labelencoder = preprocessing.LabelEncoder()

train_df['Product_Info_2'] = labelencoder.fit_transform(train_df['Product_Info_2'])
test_df['Product_Info_2'] = labelencoder.fit_transform(test_df['Product_Info_2'])

In [4]:
train_df = train_df.fillna(0)
test_df = test_df.fillna(0)

In [79]:
response = train_df.iloc[:8000,-1:]
YTRAIN = response #.to_numpy()
train_df = train_df.iloc[:8000,:-1]
XTRAIN = train_df #.to_numpy()

test_df = test_df.iloc[:5000,:-1]
XTEST = test_df #.to_numpy()

In [80]:
k = 67

lpca = PCA(n_components = k)
XTrain_lpca = lpca.fit_transform(XTRAIN)
XTest_lpca = lpca.fit_transform(XTEST)

kpca = KernelPCA(n_components=k, kernel ='rbf')
XTrain_kpca = kpca.fit_transform(XTRAIN)
XTest_kpca = kpca.fit_transform(XTEST)

In [81]:
from sklearn.model_selection import train_test_split

lpca_Xtrain, lpca_Xtest, lpca_Ytrain, lpca_Ytest = train_test_split(XTrain_lpca, YTRAIN, test_size=0.5)
kpca_Xtrain, kpca_Xtest, kpca_Ytrain, kpca_Ytest = train_test_split(XTrain_kpca, YTRAIN, test_size=0.5)

In [83]:
#Logistic Regression using PCA

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

model= LogisticRegression(multi_class='ovr', solver='liblinear',C=0.1, random_state=40)
model.fit(lpca_Xtrain,lpca_Ytrain)
pred = model.predict(lpca_Xtest)
#print(pred)
pred = list(pred)
pred = pd.DataFrame(pred, columns=['Response'])
#lpca_Ytest = pred.to_numpy()
pred.to_excel('our_output.xlsx', index = False)
print('Model accuracy score with Logistic Regression using PCA : {0:0.4f}'. format(accuracy_score(lpca_Ytest, pred)*100))

  return f(*args, **kwargs)


Model accuracy score with Logistic Regression using PCA : 99.8000


In [84]:
#Logistic Regression using Kernel PCA

model= LogisticRegression(multi_class='ovr', solver='liblinear',C=0.1, random_state=40)
model.fit(kpca_Xtrain,kpca_Ytrain)
pred = model.predict(kpca_Xtest)
#print(pred)
pred = list(pred)
pred = pd.DataFrame(pred, columns=['Response'])
#kpca_Ytest = pred.to_numpy()
pred.to_excel('our_output.xlsx', index = False)
print('Model accuracy score with Logistic Regression using Kernel PCA : {0:0.4f}'. format(accuracy_score(kpca_Ytest, pred)*100))

Model accuracy score with Logistic Regression using Kernel PCA : 94.4250


  return f(*args, **kwargs)


In [85]:
#Linear Regression using PCA

from sklearn.linear_model import LinearRegression

model= LinearRegression()
model.fit(lpca_Xtrain,lpca_Ytrain)
pred = model.predict(lpca_Xtest)

pred_linalg= []
for i in pred:
    i = i.astype(int)
    pred_linalg.append(i)

#print(pred)
accuracy = accuracy_score(lpca_Ytest, pred_linalg)*100
accuracy

print('Model accuracy score with Linear Regression using PCA : ',accuracy)

Model accuracy score with Linear Regression using PCA :  94.825


In [86]:
#Linear Regression using Kernel PCA

model= LinearRegression()
model.fit(kpca_Xtrain,kpca_Ytrain)
pred = model.predict(kpca_Xtest)

pred_linalg= []
for i in pred:
    i = i.astype(int)
    pred_linalg.append(i)

#print(pred)
accuracy = accuracy_score(kpca_Ytest, pred_linalg)*100
accuracy

print('Model accuracy score with Linear Regression using Kernel PCA : ',accuracy)

Model accuracy score with Linear Regression using Kernel PCA :  94.325


In [87]:
#Support Vector Classification using PCA

from sklearn.svm import SVC 

clf = SVC(kernel='poly', C=0.1, random_state=40)
clf.fit(lpca_Xtrain,lpca_Ytrain)
pred=clf.predict(lpca_Xtest)
#print(pred)
print('Model accuracy score with SVMs using PCA : {0:0.4f}'. format(accuracy_score(lpca_Ytest, pred)*100))

Model accuracy score with SVMs using PCA : 94.2500


  return f(*args, **kwargs)


In [88]:
#Support Vector Classification using Kernel PCA

clf = SVC(kernel='poly', C=0.1, random_state=40)
clf.fit(kpca_Xtrain,kpca_Ytrain)
pred=clf.predict(kpca_Xtest)
#print(pred)
print('Model accuracy score with SVMs using Kernel PCA : {0:0.4f}'. format(accuracy_score(kpca_Ytest, pred)*100))

  return f(*args, **kwargs)


Model accuracy score with SVMs using Kernel PCA : 94.3500


In [89]:
#SGD Classifier using PCA

from sklearn.linear_model import SGDClassifier

model = SGDClassifier(loss="hinge", max_iter =100, penalty='elasticnet', random_state=40)
model.fit(lpca_Xtrain,lpca_Ytrain)
pred = model.predict(lpca_Xtest)
#print(pred)

print('Model accuracy score with SGD Classifier using PCA : {0:0.4f}'. format(accuracy_score(lpca_Ytest, pred)*100))

Model accuracy score with SGD Classifier using PCA : 58.0500


  return f(*args, **kwargs)


In [90]:
#SGD Classifier using Kernel PCA

model = SGDClassifier(loss="hinge", max_iter =100, penalty='elasticnet', random_state=40)
model.fit(kpca_Xtrain,kpca_Ytrain)
pred = model.predict(kpca_Xtest)
#print(pred)

print('Model accuracy score with SGD Classifier using Kernel PCA : {0:0.4f}'. format(accuracy_score(kpca_Ytest, pred)*100))

Model accuracy score with SGD Classifier using Kernel PCA : 94.4250


  return f(*args, **kwargs)


In [91]:
#XGB Classifier using PCA

from xgboost import XGBClassifier

model = XGBClassifier(booster='gbtree', gamma=1,
              learning_rate=0.3, max_depth=4,
              min_child_weight=1, n_estimators=100, reg_lambda=3, random_state=40)
model.fit(lpca_Xtrain,lpca_Ytrain)
pred = model.predict(lpca_Xtest)
#print(pred)

print('Model accuracy score with XGB Classifier using PCA : {0:0.4f}'. format(accuracy_score(lpca_Ytest, pred)*100))

  return f(*args, **kwargs)


Model accuracy score with XGB Classifier using PCA : 99.9500


In [92]:
#XGB Classifier using Kernel PCA

from xgboost import XGBClassifier

model = XGBClassifier(booster='gbtree', gamma=1,
              learning_rate=0.3, max_depth=4,
              min_child_weight=1, n_estimators=100, reg_lambda=3, random_state=40)
model.fit(kpca_Xtrain,kpca_Ytrain)
pred = model.predict(kpca_Xtest)
#print(pred)

print('Model accuracy score with XGB Classifier using Kernel PCA : {0:0.4f}'. format(accuracy_score(kpca_Ytest, pred)*100))



  return f(*args, **kwargs)


Model accuracy score with XGB Classifier using Kernel PCA : 94.4250


In [93]:
#Random Forest with 10 Decision Trees using PCA

from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(random_state=40)
rfc.fit(lpca_Xtrain, lpca_Ytrain)

y_pred = rfc.predict(lpca_Xtest)

print('Model accuracy score with 10 decision-trees using PCA: {0:0.4f}'. format(accuracy_score(lpca_Ytest, y_pred)*100))

  rfc.fit(lpca_Xtrain, lpca_Ytrain)


Model accuracy score with 10 decision-trees using PCA: 99.9250


In [94]:
#Random Forest with 10 Decision Trees using Kernel PCA

rfc = RandomForestClassifier(random_state=40)
rfc.fit(kpca_Xtrain, kpca_Ytrain)

y_pred = rfc.predict(kpca_Xtest)

print('Model accuracy score with 10 decision-trees using Kernel PCA : {0:0.4f}'. format(accuracy_score(kpca_Ytest, y_pred)*100))

  rfc.fit(kpca_Xtrain, kpca_Ytrain)


Model accuracy score with 10 decision-trees using Kernel PCA : 94.0000


In [95]:
#K Neighbours Classifier using PCA

from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors=40)
model.fit(lpca_Xtrain,lpca_Ytrain)
pred = model.predict(lpca_Xtest)
#print(pred)

print('Model accuracy score with K Neighbours Classifier using PCA : {0:0.4f}'. format(accuracy_score(lpca_Ytest, pred)*100))

  return self._fit(X, y)


Model accuracy score with K Neighbours Classifier using PCA : 94.2500


In [96]:
#K Neighbours Classifier using Kernel PCA

model = KNeighborsClassifier(n_neighbors=40)
model.fit(kpca_Xtrain,kpca_Ytrain)
pred = model.predict(kpca_Xtest)
#print(pred)

print('Model accuracy score with K Neighbours Classifier using Kernel PCA : {0:0.4f}'. format(accuracy_score(kpca_Ytest, pred)*100))

  return self._fit(X, y)


Model accuracy score with K Neighbours Classifier using Kernel PCA : 94.4250


In [97]:
#Artificial Neural Network using PCA

import tensorflow as tf

tf.random.set_seed(40)
ann = tf.keras.models.Sequential()
ann.add(tf.keras.layers.Dense(units=6,activation="relu"))
ann.add(tf.keras.layers.Dense(units=6,activation="relu"))
ann.add(tf.keras.layers.Dense(units=1,activation="sigmoid"))
ann.compile(optimizer="adam",loss="categorical_crossentropy",metrics=['accuracy'])

ann.fit(lpca_Xtrain,lpca_Ytrain,batch_size=32,epochs = 20)
pred = ann.predict(lpca_Xtest)

pred_ann= []
for i in pred:
    i = i.astype(int)
    pred_ann.append(i)
#print(pred)
accuracy = accuracy_score(lpca_Ytest, pred_ann)*100
print('Model accuracy score with ANN using PCA : ', accuracy)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model accuracy score with ANN using PCA :  94.25


In [98]:
#Artificial Neural Network using Kernel PCA

tf.random.set_seed(40)
ann = tf.keras.models.Sequential()
ann.add(tf.keras.layers.Dense(units=6,activation="relu"))
ann.add(tf.keras.layers.Dense(units=6,activation="relu"))
ann.add(tf.keras.layers.Dense(units=1,activation="sigmoid"))
ann.compile(optimizer="adam",loss="categorical_crossentropy",metrics=['accuracy'])

ann.fit(kpca_Xtrain,kpca_Ytrain,batch_size=32,epochs = 20)
pred = ann.predict(kpca_Xtest)
#print(pred)

pred_ann= []
for i in pred:
    i = i.astype(int)
    pred_ann.append(i)
    
accuracy = accuracy_score(kpca_Ytest, pred_ann)*100
print('Model accuracy score with ANN using Kernel PCA : ', accuracy)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model accuracy score with ANN using Kernel PCA :  94.425
