In [6]:
import numpy as np 
from helpers import * 
from implementations import *

In [3]:
MAX_ROWS = 10000

x_data = np.genfromtxt('data/dataset/x_train.csv', delimiter=",", skip_header=1,  max_rows=MAX_ROWS)
y_data = np.genfromtxt('data/dataset/y_train.csv', delimiter=",", skip_header=1,  max_rows=MAX_ROWS)

x_data[np.isnan(x_data)] = 0
x_data = normalize(x_data)

In [5]:
x_train, x_test, y_train, y_test = split_data(x_data, y_data, 0.7)

In [81]:
N, D = x_train.shape
initial_w = np.random.rand(D)

w, loss = logistic_regression(y_train.reshape(-1, 1), x_train, initial_w.reshape(-1, 1), 10000, 0.01)

pred = predict_logistic(x_test, w)
np.sum(pred == y_test) / len(y_test)

0.668

In [68]:
N, D = x_train.shape
initial_w = np.random.rand(D)

w, loss = reg_logistic_regression(y_train.reshape(-1, 1), x_train, 0.5, initial_w.reshape(-1, 1), 1000, 0.001)

pred = predict_logistic(x_test, w)
np.sum(pred == y_test) / len(y_test)

  loss = -np.mean(y * np.log(sigmoids) + (1 - y) * np.log(1 - sigmoids))
  loss = -np.mean(y * np.log(sigmoids) + (1 - y) * np.log(1 - sigmoids))


0.6

In [75]:
import numpy as np

def k_fold_cross_validation(X, y, model, k, model_params):
    """
    Perform k-fold cross-validation.

    Parameters:
    - X: features, numpy array of shape (num_samples, num_features)
    - y: targets, numpy array of shape (num_samples, )
    - model: a classifier having fit and predict methods
    - k: number of folds
    - model_params: dictionary with values of model paramters

    Returns:
    - mean_accuracy: the average accuracy over the k-folds
    """
    num_samples = X.shape[0]
    indices = np.arange(num_samples)
    np.random.shuffle(indices)
    fold_size = num_samples // k
    accuracies = []

    for i in range(k):
        # Split data into train and test for this fold
        test_indices = indices[i * fold_size: (i + 1) * fold_size]
        train_indices = np.setdiff1d(indices, test_indices)
        
        X_train, X_test = X[train_indices], X[test_indices]
        y_train, y_test = y[train_indices], y[test_indices]
        
        # Fit model and predict
        w, loss = model(y_train, X_train, **model_params)
        
        y_pred = predict_logistic(X_test, w)
        
        # Calculate accuracy for this fold and append to accuracies list
        accuracy = np.mean(y_pred == y_test)
        accuracies.append(accuracy)
    
    # Calculate mean accuracy over all k-folds
    mean_accuracy = np.mean(accuracies)
    
    return mean_accuracy


import numpy as np


def hyperparameter_tuning(X, y, model, lambdas, gammas, model_params,  k=5):
    """
    Tune hyperparameter using k-fold cross-validation.

    Parameters:
    - X: features
    - y: targets
    - model_class: a class of the model that accepts the hyperparameter in its constructor
    - param_name: name of the hyperparameter to be tuned
    - param_values: list of values for the hyperparameter
    - k: number of folds for cross-validation

    Returns:
    - best_param_value: the value of the hyperparameter that gives the best cross-validation accuracy
    """
    best_accuracy = 0
    best_param_lambda = None
    best_param_gamma = None
    
    for gamma in gammas: 
        for lambda_ in lambdas: 

            model_params['lambda_'] = lambda_
            model_params['gamma'] = gamma
            # model = model(X, y, k, **model_params)  # Construct model with the hyperparameter
            accuracy = k_fold_cross_validation(X, y, model, k, model_params)
            
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_param_lambda = lambda_
                best_param_gamma = gamma
                
            print(f" lambda= {lambda_} gamma= {gamma} , CV accuracy = {accuracy:.4f}")
        
    return best_param_lambda, best_param_gamma

 

In [76]:
N, D = x_train.shape
initial_w = np.random.rand(D)

# k_fold_cross_validation(x_data, y_data, logistic_regression, 5, {'initial_w': initial_w ,'max_iters': 1000, 'gamma': 0.01})
hyperparameter_tuning(x_data, y_data, reg_logistic_regression,lambdas=[0.2, 0.3], gammas=[0.01, 0.05], model_params={'initial_w': initial_w ,'max_iters': 1000})


  loss = -np.mean(y * np.log(sigmoids) + (1 - y) * np.log(1 - sigmoids))


  loss = -np.mean(y * np.log(sigmoids) + (1 - y) * np.log(1 - sigmoids))


 lambda= 0.2 gamma= 0.01 , CV accuracy = 0.5470
 lambda= 0.3 gamma= 0.01 , CV accuracy = 0.5530
 lambda= 0.2 gamma= 0.05 , CV accuracy = 0.5480
 lambda= 0.3 gamma= 0.05 , CV accuracy = 0.5620


(0.3, 0.05)