In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
from tqdm import tqdm

In [3]:
def knn_model(train):
  return train


In [4]:
from math import sqrt
from random import seed
from random import randrange
from csv import reader
def euclidean_distance(row1,row2):
  distance =0.0
  for i in range(len(row1)-1):
    distance += (row1[i] - row2[i])**2
  return sqrt(distance)
def minkowski_distance(row1,row2,p):
  distance =0.0
  for i in range(len(row1)-1):
    distance += (row1[i] - row2[i])**p
  return distance**(1/p)  
def get_neighbors(train,test_row,num_neighbors):
  distances = list()
  for train_row in train:
    dist = euclidean_distance(test_row,train_row)
    distances.append((train_row,dist))
  distances.sort(key=lambda tup: tup[1])
  neighbors = list()
  for i in range(num_neighbors):
    neighbors.append(distances[i][0])
  return neighbors
def knn_predict(model,test_row,num_neighbor =2):
  neighbor = get_neighbors(model,test_row,num_neighbor)
  output_values = [row[-1] for row in neighbor]
  prediction = max(set(output_values),key = output_values.count)
  return prediction


In [5]:
def perceptron_predict(model,row):
  activation = model[0]
  for i in range(len(row)-1):
    activation += model[i+1]*row[i]
  return 1.0 if activation > 0.0 else 0.0
def perceptron_model(train,l_rate=0.01,n_epoch=5000):
  weight = [randrange(-1,1) for i in range(len(train[0]))]
  for epoch in tqdm(range(n_epoch), desc="perceptron_training"):
    for row in train:
      prediction = perceptron_predict(weight,row)
      error = row[-1] -prediction
      weight[0] = weight[0] + l_rate * error
      for i in range(len(row)-1):
        weight[i+1] = weight[i+1] + l_rate * error * row[i]
  return weight

In [6]:
from math import exp
def logistic_regression_predict(model,row):
  yhat = model[0]
  for i in range(len(row)-1):
    yhat += model[i+1] * row[i]
  return 1.0/(1.0+exp(-yhat))
def logistic_regression_model(train,l_rate=0.01,n_epoch=5000):
  coef = [randrange(-1,1) for i in range(len(train[0]))]
  for epoch in tqdm(range(n_epoch), desc="Logistic regression training"):
    for row in train:
      yhat = logistic_regression_predict(coef,row)
      error = row[-1] - yhat
      coef[0] = coef[0] + l_rate*error*yhat*(1.0-yhat)
      for i in range(len(row)-1):
        coef[i+1] = coef[i+1] +l_rate*error*yhat*(1.0-yhat)*row[i]
  return coef

In [7]:
def to_stacked_row(models,predict_list,row):
  stacked_row = list()
  for i in range(len(models)):
    prediction = predict_list[i](models[i],row)
    stacked_row.append(prediction)
  stacked_row.append(row[-1])
  return row[0:len(row)-1]+stacked_row


In [8]:
def load_csv(filename):
  dataset = list()
  with open(filename,'r') as file:
    csv_reader = reader(file)
    for row in csv_reader:
      if not row:
        continue
      dataset.append(row)
  return dataset
def accuracy_metric(actual,predicted):
  correct=0
  for i in range(len(actual)):
     if actual[i] == predicted[i]:
      correct +=1
  return correct/float(len(actual)) * 100.0
def cross_validation_split(dataset,n_folds):
  dataset_split = list()
  dataset_copy = list(dataset)
  fold_size = int(len(dataset)/n_folds)
  for _ in range(n_folds):
    fold = list()
    while len(fold) < fold_size:
      index = randrange(len(dataset_copy))
      fold.append(dataset_copy.pop(index))
    dataset_split.append(fold)
  return dataset_split
def str_column_to_float(dataset,column):
  for row in dataset:
    row[column]=float(row[column].strip())
def evaluate_algorithm(dataset,algorithm,n_folds,*args):
  folds = cross_validation_split(dataset,n_folds)
  scores = list()
  for fold in tqdm( folds, desc="K-Fold"):
    train_set = list(folds)
    train_set.remove(fold)
    train_set = sum(train_set,[])
    test_set = list()
    for row in fold:
      row_copy = list(row)
      test_set.append(row_copy)
      row_copy[-1] = None
    predicted = algorithm(train_set,test_set,*args)
    actual = [row[-1] for row in fold]
    accur = accuracy_metric(actual,predicted)
    scores.append(accur)
  return scores

In [9]:
def stacking (train,test):
  model_list = [knn_model,perceptron_model]
  predict_list = [knn_predict,perceptron_predict]
  models = list()
  for i in range(len(model_list)):
    model = model_list[i](train)
    models.append(model)
  stacked_dataset=list()
  for row in train:
    stacked_row = to_stacked_row(models,predict_list,row)
    stacked_dataset.append(stacked_row)
  stacked_model=logistic_regression_model(stacked_dataset)
  predictions = list()
  for row in test:
    stacked_row = to_stacked_row(models,predict_list,row)
    stacked_dataset.append(stacked_row)
    prediction=logistic_regression_predict(stacked_model,stacked_row)
    prediction = round(prediction)
    predictions.append(prediction)
  return predictions
def str_column_to_int(dataset,column):
  for row in dataset:
    if row[column] == 'R':
      row[column] = 0
    else :
      row[column] = 1

In [10]:
seed(1)
filename = "/content/drive/MyDrive/ML_mastery/sonar.all-data.csv"
dataset = load_csv(filename)
for i in range(len(dataset[0])-1):
  str_column_to_float(dataset,i)
str_column_to_int(dataset,len(dataset[0])-1)

In [11]:
from tqdm.auto import tqdm

In [12]:
n_folds=3
scores = evaluate_algorithm(dataset,stacking,n_folds)
print('Scores: %s'%scores)
print('Mean Accuracy: %.3f%%'%(sum(scores)/float(len(scores))))

K-Fold:   0%|          | 0/3 [00:00<?, ?it/s]

perceptron_training:   0%|          | 0/5000 [00:00<?, ?it/s]

Logistic regression training:   0%|          | 0/5000 [00:00<?, ?it/s]

perceptron_training:   0%|          | 0/5000 [00:00<?, ?it/s]

Logistic regression training:   0%|          | 0/5000 [00:00<?, ?it/s]

perceptron_training:   0%|          | 0/5000 [00:00<?, ?it/s]

Logistic regression training:   0%|          | 0/5000 [00:00<?, ?it/s]

Scores: [43.47826086956522, 78.26086956521739, 69.56521739130434]
Mean Accuracy: 63.768%
