<pre>

                  ___      .-""-.      ___
    Diabetes      \  "-.  /      \  .-"  /       By
      Hill         > -=.\/        \/.=- <      Alin
    Climbing       > -='/\        /\'=- <        Cijov
                  /__.-'  \      /  '-.__\
                           '-..-'
</pre>

In [None]:
import numpy as np
import pandas as pd
from math import exp

from IPython.display import clear_output
from numpy.random import randn
from numpy.random import rand
from tqdm import tqdm

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from sklearn.utils import shuffle
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression

import warnings
warnings.filterwarnings('ignore')

# Prepare Data

In [None]:
path = '../input/pima-indians-diabetes-database/diabetes.csv'
df = pd.read_csv(path)
df = shuffle(df)
df.head()

In [None]:
features = df[df.columns[:-1]]
for column in list(features.columns):
    features[column] = (features[column] - features[column].mean()) / features[column].std()
labels = df['Outcome']

# Model

## Forward function

In [None]:
def sigmoid(x):
    return 1.0 / (1.0 + exp(-x))

def forward(row, weights):
    activation = weights[-1]
    for i in range(len(row)):
        activation += weights[i] * row[i]
    return activation

## Prediction functions

In [None]:
def predict(row, network):
    inputs = row
    for layer in network:
        new_inputs = list()
        for node in layer:
            out = forward(inputs, node)
            out = sigmoid(out)
            new_inputs.append(out)
        inputs = new_inputs
    return inputs[0]

def predict_dataset(X, network):
    yhats = list()
    for row in X:
        yhat = predict(row, network)
        yhats.append(yhat)
    return yhats

## Hill Climbing

In [None]:
def objective(X, y, network):
    yhat = predict_dataset(X, network)
    yhat = [round(y) for y in yhat]
    score = accuracy_score(y, yhat)
    return score

def step(network, step_size):
    new_net = list()
    for layer in network:
        new_layer = list()
        for node in layer:
            new_node = node.copy() + randn(len(node)) * step_size
            new_layer.append(new_node)
        new_net.append(new_layer)
    return new_net

def hill_climbing(X, y, objective, solution, n_iter, step_size):
    solution_eval = objective(X, y, solution)
    for i in tqdm(range(n_iter)):
        candidate = step(solution, step_size)
        candidte_eval = objective(X, y, candidate)
        # store only if the score is better
        if candidte_eval >= solution_eval:
            solution, solution_eval = candidate, candidte_eval
    return [solution, solution_eval]

## Parameters

In [None]:
def get_model(n_inputs, n_hidden=10):
    hidden1 = [rand(n_inputs + 1) for _ in range(n_hidden)]
    output1 = [rand(n_hidden + 1)]
    network = [hidden1, output1]
    return network

In [None]:
n_iter = 1000
step_size = 0.1
n_inputs = features.shape[1]
networks = [get_model(n_inputs) for a in range(3)]
kf = KFold(n_splits=3)

# Training

In [None]:
scores = []
for i, (train_index, test_index) in enumerate(kf.split(features)):
    clear_output(wait=True)
    
    features, labels = shuffle(features, labels)
    features_ = features.values
    labels_ = labels.values

    X_train, X_test, y_train, y_test = train_test_split(features_, labels_, test_size=0.2)

    network, score = hill_climbing(X_train, y_train, objective, networks[i], n_iter, step_size)
    networks[i] = network
    print('Best Accuracy: %f' % (score))

    yhat = predict_dataset(X_test, networks[i])
    yhat = [round(y) for y in yhat]

    score = accuracy_score(y_test, yhat)
    print('Test Accuracy: %.5f' % (score * 100))
    scores.append(score)

# Blending

In [None]:
features, labels = shuffle(features, labels)
features_ = features.values
labels_ = labels.values

X_train, X_test, y_train, y_test = train_test_split(features_, labels_, test_size=0.2)

In [None]:
def get_ensemble(X, models):
    outs = list()
    for i in range(3):
        out = predict_dataset(X, networks[i])
        outs.append(out)
    return np.moveaxis(np.array(outs), 1, 0)

In [None]:
meta_train = get_ensemble(X_train, networks)
meta_test = get_ensemble(X_test, networks)

## Linear Regression

In [None]:
reg = LinearRegression().fit(meta_train, y_train)
print("Linear Regression Accuracy:{:1.3f}".format(reg.score(meta_test, y_test)))

## Logistic Regression

In [None]:
model = LogisticRegression(solver='liblinear')
model.fit(meta_train, y_train)

In [None]:
yhat = model.predict(meta_test)
acc = accuracy_score(y_test, yhat)
print("Logistic Regression Accuracy:{:1.3f}".format(acc))