In [43]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder


# Carregando os dados
df_vehicle = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/AM/lista-2/vehicle.csv")

# Dividindo os dados em características (X) e rótulo (y)
X_vehicle = df_vehicle.iloc[:, :-1].values  # 18 primeiras colunas são características
y_vehicle = df_vehicle.iloc[:, -1].values  # Última coluna é a saída

# Codificando as classes como números inteiros
label_encoder = LabelEncoder()
y_vehicle_encoded = label_encoder.fit_transform(y_vehicle).reshape(-1, 1)

# One-hot encoding dos rótulos
one_hot_encoder = OneHotEncoder(sparse=False)
y_vehicle_one_hot = one_hot_encoder.fit_transform(y_vehicle_encoded)

# Normalizando os dados de entrada
X_normalized_vehicle = (X_vehicle - X_vehicle.min(axis=0)) / (X_vehicle.max(axis=0) - X_vehicle.min(axis=0))



In [44]:
# Função para calcular média e desvio padrão
def mean_std(values):
    return np.mean(values), np.std(values)

# Função de ativação sigmoidal
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Função de previsão
def predict(X, weights, bias):
    z = np.dot(X, weights) + bias
    return sigmoid(z)


In [45]:
# Gradient Descent
def gradient_descent(X, y, weights, bias, learning_rate, iterations):
    n = float(len(X))
    for i in range(iterations):
        y_pred = predict(X, weights, bias)
        error = y - y_pred
        gradient_weights = -np.dot(X.T, error) / n
        gradient_bias = -np.mean(error)
        weights -= learning_rate * gradient_weights
        bias -= learning_rate * gradient_bias
    return weights, bias


In [46]:
# Stochastic Gradient Descent
def stochastic_gradient_descent(X, y, weights, bias, learning_rate):
    n = float(len(X))
    for i in range(len(X)):
        x_i = X[i]
        y_i = y[i]
        y_pred = predict(x_i, weights, bias)
        error = y_i - y_pred
        gradient_weights = -x_i.reshape(-1,1) * error
        gradient_bias = -error
        weights -= learning_rate * gradient_weights
        bias -= learning_rate * gradient_bias
    return weights, bias

In [47]:
# Função para avaliação do modelo
def evaluate_model(X_train, y_train, X_test, y_test, learning_rate, iterations, optimizer):
    # Inicializando os pesos e o viés
    weights = np.zeros((X_train.shape[1], y_train.shape[1]))
    bias = np.zeros(y_train.shape[1])

    # Selecionando o algoritmo de otimização
    if optimizer == 'gd':
        weights, bias = gradient_descent(X_train, y_train, weights, bias, learning_rate, iterations)
    elif optimizer == 'sgd':
        weights, bias = stochastic_gradient_descent(X_train, y_train, weights, bias, learning_rate)

    # Fazendo previsões no conjunto de teste
    y_pred = predict(X_test, weights, bias)
    y_pred_binary = (y_pred == y_pred.max(axis=1, keepdims=True)).astype(int)

    # Calculando acurácia global
    accuracy = np.mean(np.all(y_pred_binary == y_test, axis=1))

    # Calculando a acurácia por classe
    class_accuracies = []
    for i in range(y_test.shape[1]):
        class_accuracy = np.mean((y_pred_binary[:, i] == y_test[:, i])[y_test[:, i] == 1])
        class_accuracies.append(class_accuracy)

    return accuracy, class_accuracies


In [48]:
# Definindo Hiperparâmetros
learning_rate = 0.1
iterations = 100
n_folds = 10

# Validação cruzada
def cross_validate(X, y, n_folds, learning_rate, iterations, optimizer):
    fold_size = len(X) // n_folds
    accuracies = []
    class_accuracies_list = []

    for fold in range(n_folds):
        start = fold * fold_size
        end = start + fold_size

        X_test = X[start:end]
        y_test = y[start:end]

        X_train = np.concatenate((X[:start], X[end:]), axis=0)
        y_train = np.concatenate((y[:start], y[end:]), axis=0)

        accuracy, class_accuracies = evaluate_model(X_train, y_train, X_test, y_test, learning_rate, iterations, optimizer)
        accuracies.append(accuracy)
        class_accuracies_list.append(class_accuracies)

    mean_accuracy = np.mean(accuracies)
    mean_class_accuracies = np.mean(class_accuracies_list, axis=0)

    std_accuracy = np.std(accuracies)
    std_class_accuracies = np.std(class_accuracies_list, axis=0)

    return mean_accuracy, mean_class_accuracies, std_accuracy, std_class_accuracies



In [49]:
# Executando a validação cruzada
mean_accuracy, mean_class_accuracies, std_accuracy, std_class_accuracies = cross_validate(X_normalized_vehicle, y_vehicle_one_hot, n_folds, learning_rate, iterations, 'gd')

print('Gradient Descent')
print(f"Média Global: {mean_accuracy}")
print(f"Desvio Padrão Global: {std_accuracy}")
for i in range(len(mean_class_accuracies)):
    print(f"Média da Acurácia para Classe {i}: {mean_class_accuracies[i]}")
    print(f"Desvio Padrão da Acurácia para Classe {i}: {std_class_accuracies[i]}")


Gradient Descent
Média Global: 0.42738095238095236
Desvio Padrão Global: 0.0695079075095
Média da Acurácia para Classe 0: 0.3746525234025234
Desvio Padrão da Acurácia para Classe 0: 0.09696582956291487
Média da Acurácia para Classe 1: 0.29411698276148407
Desvio Padrão da Acurácia para Classe 1: 0.19155185790623847
Média da Acurácia para Classe 2: 0.5921285435713515
Desvio Padrão da Acurácia para Classe 2: 0.18743012236688475
Média da Acurácia para Classe 3: 0.5212723598211214
Desvio Padrão da Acurácia para Classe 3: 0.12751958590310258


## Gradient Descent

## Stochastic Gradient Descent

In [50]:
# Executando a validação cruzada
mean_accuracy, mean_class_accuracies, std_accuracy, std_class_accuracies = cross_validate(X_normalized_vehicle, y_vehicle_one_hot, n_folds, learning_rate, iterations, 'sgd')

print('Gradient Descent')
print(f"Média Global: {mean_accuracy}")
print(f"Desvio Padrão Global: {std_accuracy}")
for i in range(len(mean_class_accuracies)):
    print(f"Média da Acurácia para Classe {i}: {mean_class_accuracies[i]}")
    print(f"Desvio Padrão da Acurácia para Classe {i}: {std_class_accuracies[i]}")


Gradient Descent
Média Global: 0.4428571428571429
Desvio Padrão Global: 0.06135285107964346
Média da Acurácia para Classe 0: 0.1405570818070818
Desvio Padrão da Acurácia para Classe 0: 0.05353814162007227
Média da Acurácia para Classe 1: 0.0
Desvio Padrão da Acurácia para Classe 1: 0.0
Média da Acurácia para Classe 2: 0.7558506776023755
Desvio Padrão da Acurácia para Classe 2: 0.1257245610800545
Média da Acurácia para Classe 3: 0.9067105263157893
Desvio Padrão da Acurácia para Classe 3: 0.07134636089496375


## Análise do discriminante Gaussiano

In [51]:
def gaussian_discriminant_analysis(X_train, y_train):
    # Obtendo as classes únicas
    classes = np.arange(y_train.shape[1])

    # Inicializando variáveis para armazenar parâmetros de cada classe
    mus = {}
    covs = {}
    priors = {}

    for c in classes:
        X_c = X_train[y_train[:, c] == 1]
        mus[c] = np.mean(X_c, axis=0)
        covs[c] = np.cov(X_c, rowvar=False)
        priors[c] = len(X_c) / len(X_train)

    # Calcular a matriz de covariância comum
    cov_shared = sum(priors[c] * covs[c] for c in classes)
    inv_cov_shared = np.linalg.inv(cov_shared)
    det_cov_shared = np.linalg.det(cov_shared)

    consts = {c: -0.5 * np.log(det_cov_shared) + np.log(priors[c]) for c in classes}

    return mus, inv_cov_shared, consts



In [52]:
def predict_gda(X, mus, inv_cov_shared, consts):
    # Calcular as funções discriminantes para cada classe
    g = {c: -0.5 * np.sum((X - mus[c]) @ inv_cov_shared * (X - mus[c]), axis=1) + consts[c] for c in mus}

    # Comparar as funções discriminantes e fazer as previsões
    y_pred = np.argmax(np.column_stack(list(g.values())), axis=1)

    return y_pred


In [53]:
def evaluate_model_gda(X_train, y_train, X_test, y_test):
    # Treinando o modelo GDA
    mus, inv_cov_shared, consts = gaussian_discriminant_analysis(X_train, y_train)

    # Fazendo previsões no conjunto de teste
    y_pred = predict_gda(X_test, mus, inv_cov_shared, consts)

    # Calculando acurácia global
    accuracy = np.mean(y_pred == y_test.argmax(axis=1))

    # Calculando a acurácia por classe
    class_accuracies = []
    for c in range(y_train.shape[1]):
        class_accuracy = np.mean((y_pred == y_test.argmax(axis=1))[y_test[:, c] == 1])
        class_accuracies.append(class_accuracy)

    return accuracy, class_accuracies


In [54]:
# Definindo o número de folds para validação cruzada
n_folds = 10

# Função para realizar a validação cruzada
def cross_validate_gda(X, y, n_folds):
    fold_size = len(X) // n_folds
    accuracies = []
    class_accuracies_list = []

    for fold in range(n_folds):
        start = fold * fold_size
        end = start + fold_size

        X_test = X[start:end]
        y_test = y[start:end]

        X_train = np.concatenate((X[:start], X[end:]), axis=0)
        y_train = np.concatenate((y[:start], y[end:]), axis=0)

        accuracy, class_accuracies = evaluate_model_gda(X_train, y_train, X_test, y_test)
        accuracies.append(accuracy)
        class_accuracies_list.append(class_accuracies)

    mean_accuracy = np.mean(accuracies)
    mean_class_accuracies = np.mean(class_accuracies_list, axis=0)

    std_accuracy = np.std(accuracies)
    std_class_accuracies = np.std(class_accuracies_list, axis=0)

    return mean_accuracy, mean_class_accuracies, std_accuracy, std_class_accuracies

# Executando a validação cruzada
mean_accuracy, mean_class_accuracies, std_accuracy, std_class_accuracies = cross_validate_gda(X_normalized_vehicle, y_vehicle_one_hot, n_folds)

print(f"Média Global: {mean_accuracy}")
print(f'Desvio Padrão Global: {std_accuracy}')

for i in range(len(mean_class_accuracies)):
    print(f"Média da Acurácia para Classe {i}: {mean_class_accuracies[i]}")
    print(f'Desvio Padrão {i}: {std_class_accuracies[i]}')

Média Global: 0.7773809523809524
Desvio Padrão Global: 0.03651871821470944
Média da Acurácia para Classe 0: 0.9664845339845339
Desvio Padrão 0: 0.030301028068568773
Média da Acurácia para Classe 1: 0.5969163663489111
Desvio Padrão 1: 0.10416027273200833
Média da Acurácia para Classe 2: 0.5823343720296714
Desvio Padrão 2: 0.12804738627234435
Média da Acurácia para Classe 3: 0.9573675610595116
Desvio Padrão 3: 0.055280406998010206


## Naive Bayes

In [55]:
def fit_naive_bayes(X_train, y_train):
    classes = np.unique(y_train.argmax(axis=1))
    class_priors = {}
    means = {}
    stds = {}

    for c in classes:
        X_c = X_train[y_train[:, c] == 1]
        class_priors[c] = len(X_c) / len(X_train)
        means[c] = np.mean(X_c, axis=0)
        stds[c] = np.std(X_c, axis=0)

    return class_priors, means, stds



In [56]:
def calculate_probability(x, mean, std):
    exponent = np.exp(-((x - mean) ** 2) / (2 * (std ** 2)))
    return np.prod((1 / (np.sqrt(2 * np.pi) * std)) * exponent)


In [57]:
def predict_naive_bayes(X_test, class_priors, means, stds):
    predictions = []
    for x in X_test:
        class_probabilities = {}
        for c in class_priors:
            class_probabilities[c] = class_priors[c] * calculate_probability(x, means[c], stds[c])
        predicted_class = max(class_probabilities, key=class_probabilities.get)
        predictions.append(predicted_class)

    return np.array(predictions)



In [58]:
def evaluate_model_naive_bayes(X_train, y_train, X_test, y_test):
    # Treinando o modelo Naive Bayes
    class_priors, means, stds = fit_naive_bayes(X_train, y_train)

    # Fazendo previsões no conjunto de teste
    y_pred = predict_naive_bayes(X_test, class_priors, means, stds)

    # Calculando acurácia global
    accuracy = np.mean(y_pred == y_test.argmax(axis=1))

    # Calculando a acurácia por classe
    class_accuracies = []
    for c in range(y_train.shape[1]):
        class_accuracy = np.mean((y_pred == y_test.argmax(axis=1))[y_test[:, c] == 1])
        class_accuracies.append(class_accuracy)

    return accuracy, class_accuracies



In [61]:
# Definindo o número de folds para validação cruzada
n_folds = 10

# Função para realizar a validação cruzada
def cross_validate_naive_bayes(X, y, n_folds):
    fold_size = len(X) // n_folds
    accuracies = []
    class_accuracies_list = []

    for fold in range(n_folds):
        start = fold * fold_size
        end = start + fold_size

        X_test = X[start:end]
        y_test = y[start:end]

        X_train = np.concatenate((X[:start], X[end:]), axis=0)
        y_train = np.concatenate((y[:start], y[end:]), axis=0)

        accuracy, class_accuracies = evaluate_model_naive_bayes(X_train, y_train, X_test, y_test)
        accuracies.append(accuracy)
        class_accuracies_list.append(class_accuracies)

    mean_accuracy = np.mean(accuracies)
    mean_class_accuracies = np.mean(class_accuracies_list, axis=0)

    std_accuracy = np.std(accuracies)
    std_class_accuracies = np.std(class_accuracies_list, axis=0)

    return mean_accuracy, mean_class_accuracies, std_accuracy, std_class_accuracies

# Executando a validação cruzada
mean_accuracy, mean_class_accuracies, std_accuracy, std_class_accuracies = cross_validate_naive_bayes(X_normalized_vehicle, y_vehicle_one_hot, n_folds)

print(f"Média Global: {mean_accuracy}")
print(f'Desvio Padrão Global: {std_accuracy}')

for i in range(len(mean_class_accuracies)):
    print(f"Média da Acurácia para Classe {i}: {mean_class_accuracies[i]}")
    print(f'Desvio Padrão {i}: {std_class_accuracies[i]}')

Média Global: 0.4607142857142857
Desvio Padrão Global: 0.04822530738221212
Média da Acurácia para Classe 0: 0.1693040293040293
Desvio Padrão 0: 0.11984354652352562
Média da Acurácia para Classe 1: 0.419102116801925
Desvio Padrão 1: 0.06824332767855044
Média da Acurácia para Classe 2: 0.40756658435488147
Desvio Padrão 2: 0.08479422976772243
Média da Acurácia para Classe 3: 0.8830503955968354
Desvio Padrão 3: 0.08652589937380051
