# Exercício 01 com sklearn

In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Carregar o conjunto de dados
data = pd.read_csv('class01.csv')

# Separar as features (x0 a x99) e o target
X = data.iloc[:, :-1]
y = data['target']

# Definir o tamanho do conjunto de treino
train_size = 350

# Dividir o conjunto de dados em treino e validação
X_train, X_val, y_train, y_val = train_test_split(X, y, train_size=train_size, random_state=42)

# Inicializar o modelo Naive Bayes Gaussiano
model = GaussianNB()

# Treinar o modelo com o conjunto de treino
model.fit(X_train, y_train)

# Realizar previsões no conjunto de validação
y_pred = model.predict(X_val)

y_train_pred = model.predict(X_train)

# Calcular a acurácia
accuracy = accuracy_score(y_val, y_pred)

print(f'Acurácia do modelo: {accuracy:.2f}')
print(f'Acurácia do modelo: {accuracy_score(y_train, y_train_pred):.2f}')

Acurácia do modelo: 0.62
Acurácia do modelo: 0.76


# Exercício 01 sem sklearn

In [13]:
import pandas as pd
import numpy as np

# Carregar o conjunto de dados
data = pd.read_csv('class01.csv')

# Separar as features (x0 a x99) e o target
X = data.iloc[:, :-1].values
y = data['target'].values

# Definir o tamanho do conjunto de treino
train_size = 350

# Dividir o conjunto de dados em treino e validação
X_train, X_val = X[:train_size, :], X[train_size:, :]
y_train, y_val = y[:train_size], y[train_size:]


def compute_accuracy(y_pred, y_val):
  assert len(y_pred) == len(y_val)
  return np.sum(y_pred == y_val) / len(y_pred)
  

def calculate_statistics(X, y):
    statistics = {}
    unique_classes = np.unique(y)

    for class_ in unique_classes:
        # Filtrar instâncias da classe atual
        X_cls = X[y == class_]

        # Calcular médias e desvios padrão para cada feature
        means = np.mean(X_cls, axis=0)
        stds = np.std(X_cls, axis=0)

        statistics[class_] = {'mean': means, 'std': stds}

    return statistics

def gaussian_probability(x, mean, std):
    exponent = np.exp(-((x - mean) ** 2) / (2 * (std ** 2)))
    return (1 / (np.sqrt(2 * np.pi) * std)) * exponent

def predict(X, statistics):
    predictions = []

    for x in X:
        class_probabilities = {}

        for class_, stat in statistics.items():
            means, stds = stat['mean'], stat['std']
            probabilities = np.prod(gaussian_probability(x, means, stds))
            class_probabilities[class_] = probabilities

        predicted_class = max(class_probabilities, key=class_probabilities.get)
        predictions.append(predicted_class)

    return np.array(predictions)

# Calcular estatísticas no conjunto de treino
statistics = calculate_statistics(X_train, y_train)

# Fazer previsões no conjunto de treino
y_train_pred = predict(X_train, statistics)
train_acc = compute_accuracy(y_train_pred, y_train)

# Fazer previsões no conjunto de validação
y_val_pred = predict(X_val, statistics)
val_acc = compute_accuracy(y_val_pred, y_val)


print(f'Acurácia no conjunto de treino: {train_acc:.2f}')
print(f'Acurácia no conjunto de validação: {val_acc:.2f}')

Acurácia no conjunto de treino: 0.76
Acurácia no conjunto de validação: 0.63


# Exercício 03 com sklearn

In [15]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import LeaveOneOut
from math import sqrt

# Carregar o conjunto de dados
data = pd.read_csv('reg01.csv')

# Separar as features (x0 a x9) e o target
X = data.iloc[:, :-1]
y = data['target']

# Definir o tamanho do conjunto de treino
train_size = 350

# Dividir o conjunto de dados em treino e validação
X_train, X_valid = X.iloc[:train_size, :], X.iloc[train_size:, :]
y_train, y_valid = y.iloc[:train_size], y.iloc[train_size:]

# Inicializar o modelo LASSO com alpha=1
lasso_model = Lasso(alpha=1)

# Inicializar Leave-One-Out
loo = LeaveOneOut()

# Listas para armazenar os resultados
train_rmse_list = []
valid_rmse_list = []

# Treinar e avaliar o modelo usando Leave-One-Out
for train_index, valid_index in loo.split(X_train):
    X_train_fold, X_valid_fold = X_train.iloc[train_index], X_train.iloc[valid_index]
    y_train_fold, y_valid_fold = y_train.iloc[train_index], y_train.iloc[valid_index]

    # Treinar o modelo LASSO
    lasso_model.fit(X_train_fold, y_train_fold)

    # Fazer previsões no conjunto de treino
    y_train_pred = lasso_model.predict(X_train_fold)

    # Calcular RMSE no conjunto de treino
    train_rmse = sqrt(mean_squared_error(y_train_fold, y_train_pred))
    train_rmse_list.append(train_rmse)

    # Fazer previsões no conjunto de validação
    y_valid_pred = lasso_model.predict(X_valid_fold)

    # Calcular RMSE no conjunto de validação
    valid_rmse = sqrt(mean_squared_error(y_valid_fold, y_valid_pred))
    valid_rmse_list.append(valid_rmse)

# Calcular o valor médio do RMSE para treino e validação
mean_train_rmse = np.mean(train_rmse_list)
mean_valid_rmse = np.mean(valid_rmse_list)

print(f'Média do RMSE para treino: {mean_train_rmse:.2f}')
print(f'Média do RMSE para validação: {mean_valid_rmse:.2f}')

Média do RMSE para treino: 19.26
Média do RMSE para validação: 15.88


# Exercício 03 sem sklearn

In [16]:
import pandas as pd
import numpy as np
from math import sqrt

# Carregar o conjunto de dados
data = pd.read_csv('reg01.csv')

# Separar as features (x0 a x9) e o target
X = data.iloc[:, :-1].values
y = data['target'].values

# Definir o tamanho do conjunto de treino
train_size = 350

# Dividir o conjunto de dados em treino e validação
X_train, X_valid = X[:train_size, :], X[train_size:, :]
y_train, y_valid = y[:train_size], y[train_size:]

# Adicionar uma coluna de 1s para representar o termo de interceptação (bias)
X_train = np.column_stack((np.ones(X_train.shape[0]), X_train))
X_valid = np.column_stack((np.ones(X_valid.shape[0]), X_valid))

# Inicializar hiperparâmetros
alpha = 1.0
learning_rate = 0.01
epochs = 1000

# Inicializar pesos (coeficientes)
weights = np.zeros(X_train.shape[1])

def l1_regularization(alpha, weights):
    return alpha * np.sum(np.abs(weights[1:]))

def lasso_objective(X, y, weights, alpha):
    predictions = np.dot(X, weights)
    error = predictions - y
    mse = np.mean(error ** 2)
    regularization_term = l1_regularization(alpha, weights)
    return mse + regularization_term

def lasso_gradient(X, y, weights, alpha):
    predictions = np.dot(X, weights)
    error = predictions - y
    gradient = 2 * np.dot(X.T, error) / X.shape[0]
    regularization_gradient = alpha * np.sign(weights[1:])
    gradient[1:] += regularization_gradient
    return gradient

# Treinar o modelo usando Leave-One-Out
train_rmse_list = []
valid_rmse_list = []

for i in range(X_train.shape[0]):
    # Deixar uma instância fora do treino
    X_fold_train = np.delete(X_train, i, axis=0)
    y_fold_train = np.delete(y_train, i)

    # Inicializar pesos
    weights = np.zeros(X_train.shape[1])

    # Treinar o modelo com gradiente descendente
    for epoch in range(epochs):
        gradient = lasso_gradient(X_fold_train, y_fold_train, weights, alpha)
        weights -= learning_rate * gradient

    # Fazer previsões no conjunto de treino
    y_train_pred = np.dot(X_fold_train, weights)

    # Calcular RMSE no conjunto de treino
    train_rmse = sqrt(np.mean((y_fold_train - y_train_pred) ** 2))
    train_rmse_list.append(train_rmse)

    # Fazer previsões no conjunto de validação
    y_valid_pred = np.dot(X_valid, weights)

    # Calcular RMSE no conjunto de validação
    valid_rmse = sqrt(np.mean((y_valid - y_valid_pred) ** 2))
    valid_rmse_list.append(valid_rmse)

# Calcular o valor médio do RMSE para treino e validação
mean_train_rmse = np.mean(train_rmse_list)
mean_valid_rmse = np.mean(valid_rmse_list)

print(f'Média do RMSE para treino: {mean_train_rmse:.2f}')
print(f'Média do RMSE para validação: {mean_valid_rmse:.2f}')


Média do RMSE para treino: 21.01
Média do RMSE para validação: 21.26
