In [1]:
import os
from pathlib import Path

import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from sklearn.metrics import roc_auc_score

In [2]:
data_path = Path(os.getcwd()).parent/"data"

df = pd.read_csv(data_path/"data-logistic.csv", header=None)

In [3]:
df.head()

Unnamed: 0,0,1,2
0,-1,-0.663827,-0.138526
1,1,1.994596,2.468025
2,-1,-1.247395,0.749425
3,1,2.309374,1.899836
4,1,0.849143,2.40775


In [4]:
y = df.iloc[:, 0].to_numpy()
X = df.iloc[:, 1:].to_numpy().T

In [5]:
def sigmoid(p: float):
    """
    Calculate sigmoid function 
    """
    return 1. / (1. + np.exp(p))

In [6]:
def logistic_regression(X: np.array, w: np.array) -> np.array:
    """
    Calculete class probabilities on data X with weight w. I 
    assume we had two classes 
    :params X: features matrix
    :params w: weight vector
    :return: probabilities for classes  
    """
    return sigmoid(-X.T @ w)

In [7]:
def grad_descent(X: np.array, y: np.array, lr: float = 0.1, 
                 C: float = 0, epochs: int = 10000, eps: float=1e-5,
                 initial_approximation: np.array = None) \
                 -> np.array:
    """
    Gradient descent for logistic regression. I assume we had two classes 
    :param X: features matrix 
    :param y: target value
    :param lr: learning rate
    :param C: L2 regularization strength 
    :param epochs: upper bound on steps 
    :param eps: stop criteria
    :param initial_approximation: initial approximation for w
    :return: weight for logistic regression and deltas 
    """
    if initial_approximation is not None:
        w = initial_approximation
    else:
        w = np.zeros(X.shape[0])
    
    for i in tqdm(range(epochs)):
        p = -np.sum(X * w[:, None], axis=0) * y
        in_brackets = 1 - sigmoid(p)
        s = X * in_brackets[None] * y
        new_w = w + lr * np.mean(s, axis=1) - lr * C * w

        if np.linalg.norm(new_w - w) < eps:
            break
            
        w = new_w
            
    return w

In [8]:
w = grad_descent(X, y)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10000.0), HTML(value='')))




In [9]:
def get_score_for_params(lr=0.1, C=0, ia=None):
    w = grad_descent(X, y, C=C, lr=lr, initial_approximation=ia)
    print(f"lr: {lr}, C: {C}, score: {roc_auc_score(y, logistic_regression(X, w))}")

In [10]:
get_score_for_params(C=0)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10000.0), HTML(value='')))


lr: 0.1, C: 0, score: 0.9268571428571428


In [11]:
get_score_for_params(C=10)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10000.0), HTML(value='')))


lr: 0.1, C: 10, score: 0.9362857142857142


In [12]:
get_score_for_params(lr=0.01, C=2, ia=np.array([12,10]))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10000.0), HTML(value='')))


lr: 0.01, C: 2, score: 0.937142857142857


## answer

In [13]:
with open(data_path/"res", "w") as f:
    f.write("0.927 0.936")