# Logistic Regression
* Being a "fit" data scientist should be fine if we are working with linear models
* Life with be so much better if you used an experiment tracker like MLFlow, WandB, Comet, etc.
* Lol, I used a multiclass classification dataset for a Log Reg model... I was just testing how everything works anyway.
---

### Setup

In [28]:
import torch as t
import torch.nn.functional as F 
from torch.autograd import grad
from torch.utils.data import Dataset, DataLoader

from sklearn import linear_model
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

import numpy as np

from dataclasses import dataclass

from rich import print

In [2]:
# For consistent results
t.manual_seed(123)

<torch._C.Generator at 0x7efd3fee2670>

In [3]:
device = t.device("cuda" if t.cuda.is_available() else "cpu")

In [4]:
device

device(type='cuda')

### Config

In [9]:
@dataclass
class Config:
    input_dim: int = 10
    output_dim: int = 1
    device: t.device = device
    num_epochs: int = 20
    batch_size: int = 10
    lr = 0.05


cfg = Config()

In [6]:
cfg.device

device(type='cuda')

### Loading Data
- [ ] Do everything similarly using sklearn

In [25]:
diabetes = load_diabetes(scaled=False)

# Aside from Random Split, I can try: KFold, StratifiedKFold, StratifiedShuffleSplit, etc.
X_train, X_test, y_train, y_test = train_test_split(diabetes.data,
                                                    diabetes.target,
                                                    test_size=0.2,
                                                    random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train,
                                                y_train,
                                                test_size=0.2,
                                                random_state=42) 

In [23]:
scaler = MinMaxScaler()

X_train = scaler.fit_transform(X_train)
X_val = scaler.fit_transform(X_val)

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_train.dtype)
print(y_train.dtype)

In [10]:
class DiabetesDataset(Dataset):
    '''
        Diabetes Dataset for PyTorch models
    '''
    def __init__(self, 
                X: np.ndarray,
                y: np.ndarray,
                cfg: Config):
        
        self.cfg = cfg

        self.features = t.tensor(X, device=self.cfg.device, dtype=t.float64)
        self.labels = t.tensor(y, device=self.cfg.device, dtype=t.float64)
    
    def __getitem__(self, index):
        x = self.features[index]
        y = self.labels[index]

        return x, y
    
    def __len__(self):
        return self.labels.shape[0]



In [11]:
diabetes_dataset = DiabetesDataset(X_train, y_train, cfg)

train_loader = DataLoader(
    dataset=diabetes_dataset,
    batch_size=cfg.batch_size,
    shuffle=True
)


### Logistic Regression using PyTorch

##### Trying out matmul with grad

In [25]:
x = t.randn(10, requires_grad=True, device=device)
weights = t.rand(10, requires_grad=True, device=device)
# small positive initialization
bias = t.ones(10, device=device) * 0.1

print(f"Tensor pertaining to input: \n {x}")
print(f"Tensor pertaining to weights: \n {weights}")
print(f"Tensor pertaining to bias: \n {bias}")



In [26]:
z = x @ weights + bias
activated_z = t.sigmoid(z)

print(z)
print(activated_z)

In [None]:
x = t.randn((cfg.batch_size, cfg.input_dim), device=cfg.device)
print(x.dtype)


with t.inference_mode():
    output = model(x)
    print(output)
    print(output.device)

##### Model

In [12]:

class LogisticRegression(t.nn.Module):
    '''
        Simple LogisticRegression using PyTorch 
    '''
    def __init__(self, cfg: Config):
        super().__init__()
        self.cfg = cfg
        self.linear = t.nn.Linear(self.cfg.input_dim, 
                                 self.cfg.output_dim,
                                 dtype=t.float64)

    def forward(self, x):
        logits = self.linear(x)
        probs = t.sigmoid(logits)
        
        return probs



##### Training Loop

In [None]:

model = LogisticRegression(cfg).to(device)
optimizer = t.optim.SGD(model.parameters(), lr=cfg.lr)

for epoch in range(cfg.num_epochs):

    # prepare model for training
    model = model.train()   
    for batch_idx, (features, labels) in enumerate(train_loader):
        
        probs = model(features)

        loss = F.binary_cross_entropy(probs, labels.view(probs.shape))
        
        # So the gradients do not accumulate...
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Log
        print(f"Epoch: {epoch}/{cfg.num_epochs} | Batch: {batch_idx}/{len(train_loader)} | Train loss: {loss}")

### Logistic Regression using Sklearn

In [29]:
clf = linear_model.LogisticRegression()

clf.fit(X_train, y_train)

# Predict on the testing set
y_pred = clf.predict(X_test)

# Evaluate the model using various evaluation metrics
print("Accuracy score:", accuracy_score(y_test, y_pred))
print("Classification report:\n", classification_report(y_test, y_pred))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
