In [1]:
import numpy as np
import matplotlib.pyplot as plt

def knn_model(x_train, y_train, K=10):
    def model(x, threshold=0.5):
        knn_class = []
        knn_dist = []
        for i in range(len(x_train)):
            dist = np.linalg.norm(x-x_train[i])
            if len(knn_class) < K:
                knn_class.append(y_train[i])
                knn_dist.append(dist)
            else:
                max_dist_index = np.argmax(knn_dist)
                if dist < knn_dist[max_dist_index]:
                    knn_class[max_dist_index] = y_train[i]
                    knn_dist[max_dist_index] = dist

        prob_c1 = np.mean(knn_class)  # Calculate probability as the mean
        return prob_c1
    
    return model



def calc_conf_matrix(y, y_prob, threshold=0.5):
    y_pred = [1 if prob >= threshold else 0 for prob in y_prob]
    conf_matrix = {'fp':0, 'fn':0, 'tp':0, 'tn':0}
    for i in range(len(y)):
        if y[i] == y_pred[i]:
            if y[i]:
                conf_matrix['tp'] += 1
            else:
                conf_matrix['tn'] += 1
        else:
            if y_pred[i]:
                conf_matrix['fp'] += 1
            else: 
                conf_matrix['fn'] += 1
    return conf_matrix


def recall(conf_matrix):
    rec = conf_matrix['tp']/ (conf_matrix['tp'] + conf_matrix['fn'])
    return rec

def false_pos_rate(conf_matrix):
    fpr = conf_matrix['fp']/ (conf_matrix['fp'] + conf_matrix['tn'])
    return fpr

def calc_AUC_ROC(tpr, fpr):
    auc = np.trapz(tpr, fpr)
    return auc

def predictions(x, model, threshold=0.5):
    y_pred = []
    for i in range(len(x)):
        y_pred.append(1 if model(x[i]) >= threshold else 0)
    return y_pred

def calc_ROC(x, y, model):
    thresholds = np.arange(0, 1.01, 0.05)
    tpr = []
    fpr = []
    for t in thresholds:
        y_pred = predictions(x, model, t)
        cm = calc_conf_matrix(y, y_pred)
        tpr.append(recall(cm))
        fpr.append(false_pos_rate(cm))
    return tpr, fpr


def graph_ROC(x, y, model):
    tpr, fpr = calc_ROC(x, y, model)
    auc = calc_AUC_ROC(tpr, fpr)

    plt.figure(figsize=(10, 8))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve with area = {auc:.4f}')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Curva ROC')
    plt.legend(loc="lower right")
    plt.show()

In [2]:
import pandas as pd
# Load the datasets
train_data = pd.read_csv("breast_cancer_train.csv")
valid_data = pd.read_csv("breast_cancer_valid.csv")

# Assuming the last column is the target variable
x_train = train_data.iloc[:, :-1].values
y_train = train_data.iloc[:, -1].values

x_valid = valid_data.iloc[:, :-1].values
y_valid = valid_data.iloc[:, -1].values

# Train the KNN model
K = 100  # You can adjust the value of K
knn_classifier = knn_model(x_train, y_train, K)

# Evaluate the model on the validation set
graph_ROC(x_valid, y_valid, knn_classifier)

In [6]:
def knn_model(x_train, y_train, K=10):
    def model(x):
        knn_class = []
        knn_dist = []
        for i in range(len(x_train)):
            dist = np.linalg.norm(x-x_train[i])
            if len(knn_class) < K:
                knn_class.append(y_train[i])
                knn_dist.append(dist)
            else:
                max_dist_index = np.argmax(knn_dist)
                if dist < knn_dist[max_dist_index]:
                    knn_class[max_dist_index] = y_train[i]
                    knn_dist[max_dist_index] = dist

        prob_c1 = np.mean(knn_class)  # Calculate probability as the mean
        return prob_c1  # Return probability of belonging to the positive class

    return model

def predictions_proba(x, model):
    probas = []
    for i in range(len(x)):
        probas.append(model(x[i]))
    return np.array(probas)

def predictions(x, model, threshold=0.5):
    probas = predictions_proba(x, model)
    return (probas[:, 0] < threshold).astype(int)  # Thresholding the probability for class 0


def calc_conf_matrix(y, y_prob, threshold=0.5):
    y_pred = predictions(x_valid, knn_classifier, threshold=threshold)
    conf_matrix = {'fp':0, 'fn':0, 'tp':0, 'tn':0}
    for i in range(len(y)):
        if y[i] == y_pred[i]:
            if y[i]:
                conf_matrix['tp'] += 1
            else:
                conf_matrix['tn'] += 1
        else:
            if y_pred[i]:
                conf_matrix['fp'] += 1
            else: 
                conf_matrix['fn'] += 1
    return conf_matrix

def recall(conf_matrix):
    rec = conf_matrix['tp'] / (conf_matrix['tp'] + conf_matrix['fn'])
    return rec

def false_pos_rate(conf_matrix):
    fpr = conf_matrix['fp'] / (conf_matrix['fp'] + conf_matrix['tn'])
    return fpr

def calc_AUC_ROC(tpr, fpr):
    auc = np.trapz(tpr, fpr)
    return auc

def calc_ROC(x, y, model):
    thresholds = np.arange(0, 1.01, 0.05)
    tpr = []
    fpr = []
    for t in thresholds:
        y_pred = predictions(x, model, threshold=t)
        cm = calc_conf_matrix(y, y_pred, threshold=t)
        tpr.append(recall(cm))
        fpr.append(false_pos_rate(cm))
    return tpr, fpr

def graph_ROC(x, y, model):
    tpr, fpr = calc_ROC(x, y, model)
    auc = calc_AUC_ROC(tpr, fpr)

    plt.figure(figsize=(10, 8))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve with area = {auc:.4f}')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend(loc="lower right")
    plt.show()

import pandas as pd

# Load the datasets
train_data = pd.read_csv("breast_cancer_train.csv")
valid_data = pd.read_csv("breast_cancer_valid.csv")

# Assuming the last column is the target variable
x_train = train_data.iloc[:, :-1].values
y_train = train_data.iloc[:, -1].values

x_valid = valid_data.iloc[:, :-1].values
y_valid = valid_data.iloc[:, -1].values

# Train the KNN model
K = 9  # You can adjust the value of K
knn_classifier = knn_model(x_train, y_train, K)

# Evaluate the model on the validation set
graph_ROC(x_valid, y_valid, knn_classifier)

IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed