In [None]:
import pandas as pd

**Importing test and train data**

In [None]:
train_data = pd.read_csv('sample_data/train/train.csv', index_col=0)
test_data = pd.read_csv('sample_data/test/test.csv', index_col=0)

In [None]:
train_data = train_data.drop('id', axis=1)
test_data = test_data.drop('id', axis=1)

In [None]:
train_data.info()

**handling missing data**

first we have to check if there is any missing data in our train and test data. 

Then, we should fill the missing data with a value.

In [None]:
train_missingdata = train_data.isnull().sum()
test_missingdata = test_data.isnull().sum()

as a result, there are some missing data in train and test, in the "Arrival Delay in Minutes" column. we'll handle it with mean()

In [None]:
train_data['Arrival Delay in Minutes'] = train_data['Arrival Delay in Minutes'].fillna(train_data['Arrival Delay in Minutes'].mean())
test_data['Arrival Delay in Minutes'] = test_data['Arrival Delay in Minutes'].fillna(test_data['Arrival Delay in Minutes'].mean())

**labeling non-numerical data**

In [None]:
# labeling output as 0 and 1
train_data['satisfaction'].replace({'neutral or dissatisfied': 0, 'satisfied': 1}, inplace = True)
test_data['satisfaction'].replace({'neutral or dissatisfied': 0, 'satisfied': 1}, inplace=True)

In [None]:
from sklearn.preprocessing import LabelEncoder
lencoders = {}
for col in train_data.select_dtypes(include=['object']).columns:
    lencoders[col] = LabelEncoder()
    train_data[col] = lencoders[col].fit_transform(train_data[col])

In [None]:
lencoders_t = {}
for col in test_data.select_dtypes(include=['object']).columns:
    lencoders_t[col] = LabelEncoder()
    test_data[col] = lencoders_t[col].fit_transform(test_data[col])

In [None]:
X_train =  train_data.drop('satisfaction', axis=1)
y_train = train_data['satisfaction']

X_test =  test_data.drop('satisfaction', axis=1)
y_test = test_data['satisfaction']

**normalizing data**

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

**simple Perceptron**

In [None]:
from sklearn.linear_model import Perceptron

model = Perceptron()
model.fit(X_train, y_train)

preds = model.predict(X_test)
preds

**accuracy**

In [None]:
from sklearn.metrics import accuracy_score

acc = accuracy_score(y_test, preds) * 100
acc

**kernel perceptron**

In [None]:
import numpy as np

class KernelPerceptron(object):

  def polynomial_kernel(x, y, p=3):
    return (1 + np.dot(x, y)) ** p

  def __init__(self, eta=0.01, n_iteration=1, kernel=polynomial_kernel):
    self.eta = eta
    self.n_iteration = n_iteration
    self.kernel = kernel

  # a function to calculate (1+X[i].X[j])^P and build kernel matrix
  def kernelCalculator(self, samples, X_train):
    KernelMatrix = np.zeros((samples, samples))
    for i in range(samples):
      for j in range(samples):
        KernelMatrix[i,j] = self.kernel(X_train[i], X_train[j])

    return KernelMatrix

  def fit(self, X, y):
    # total samples=103904 , features=22
    n_samples = len(X)
    n_features = len(X[0])   

    self.alpha = np.zeros(n_samples, dtype=np.float64)

    K = self.kernelCalculator(n_samples, X)
    for _ in range(self.n_iteration):
      for i in range(n_samples):
        if np.sign(np.sum(self.alpha * y * K[:,i])) != y[i]:    # if np.sign(np.sum(self.alpha * y * K[:,i])) == -1
          self.alpha[i] += 1.0    

  #def predict(self, X):
    #TODO

In [None]:
model = KernelPerceptron()
model.fit(X_train[:1000], y_train[:1000])
#preds = model.predict(X_test[:1000])