# Logistic Regression Classifier

## 1) Installing Libraries

In [None]:
# !conda install numpy pandas matplotlib --yes

In [None]:
# !pip install torch torchvision torchaudio

In [None]:
# !conda install watermark

In [None]:
%load_ext watermark
%watermark -v -p numpy,pandas,matplotlib,torch -conda

## 2) Loading the Dataset

In [None]:
import pandas as pd

df = pd.read_csv("toydata-truncated.txt", sep="\t")
df

In [None]:
X_train = df[["x1", "x2"]].values
y_train = df["label"].values

In [None]:
X_train

In [None]:
X_train.shape

In [None]:
y_train

In [None]:
y_train.shape

In [None]:
import numpy as np

np.bincount(y_train)

## 3) Visualizing the dataset

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
plt.plot(
    X_train[y_train == 0, 0],
    X_train[y_train == 0, 1],
    marker="D",
    markersize=10,
    linestyle="",
    label="Class 0",
)

plt.plot(
    X_train[y_train == 1, 0],
    X_train[y_train == 1, 1],
    marker="^",
    markersize=13,
    linestyle="",
    label="Class 1",
)

plt.legend(loc=2)

plt.xlim([-5, 5])
plt.ylim([-5, 5])

plt.xlabel("Feature $x_1$", fontsize=12)
plt.ylabel("Feature $x_2$", fontsize=12)

plt.grid()
plt.show()

In [None]:
X_train = (X_train - X_train.mean(axis=0)) / X_train.std(axis=0)

In [None]:
plt.plot(
    X_train[y_train == 0, 0],
    X_train[y_train == 0, 1],
    marker="D",
    markersize=10,
    linestyle="",
    label="Class 0",
)

plt.plot(
    X_train[y_train == 1, 0],
    X_train[y_train == 1, 1],
    marker="^",
    markersize=13,
    linestyle="",
    label="Class 1",
)

plt.legend(loc=2)

plt.xlim([-5, 5])
plt.ylim([-5, 5])

plt.xlabel("Feature $x_1$", fontsize=12)
plt.ylabel("Feature $x_2$", fontsize=12)

plt.grid()
plt.show()

## 4) Implementing the model

In [None]:
import torch
import torch.nn.functional as F

class LogisticRegression(torch.nn.Module):
    
    def __init__(self, num_features, num_classes):
        super().__init__()
        self.linear = torch.nn.Linear(num_features, num_classes)
    
    def forward(self, x):
        logits = self.linear(x)
        return logits

In [None]:
torch.manual_seed(1)

model = LogisticRegression(num_features=2, num_classes=2)

In [None]:
x = torch.tensor([[1.1, 2.1],
                  [1.1, 2.1],
                  [9.1, 4.1]])

with torch.no_grad():
    logits = model(x)
    probas = F.softmax(logits, dim=1)
    
print(probas)

## 5) Defining a DataLoader

In [None]:
from torch.utils.data import Dataset, DataLoader


class MyDataset(Dataset):
    def __init__(self, X, y):

        self.features = torch.tensor(X, dtype=torch.float32)
        self.labels = torch.tensor(y, dtype=torch.int64)

    def __getitem__(self, index):
        x = self.features[index]
        y = self.labels[index]        
        return x, y

    def __len__(self):
        return self.labels.shape[0]
    

train_ds = MyDataset(X_train, y_train)

train_loader = DataLoader(
    dataset=train_ds,
    batch_size=10,
    shuffle=True,
)

In [None]:
X_train.shape

## 6) The training loop

In [None]:
import torch.nn.functional as F


torch.manual_seed(1)
model = LogisticRegression(num_features=2, num_classes=2)
optimizer = torch.optim.SGD(model.parameters(), lr=0.5)

num_epochs = 20

for epoch in range(num_epochs):
    
    model = model.train()
    for batch_idx, (features, class_labels) in enumerate(train_loader):

        
        ###############################
        ### Complete the training loop
        ###
        ### Your code below
        ###############################
        
        logits = # ?????
        
        loss = F.cross_entropy(logits, class_labels)
        
        # ?????
        # ?????
        # ?????

        ################################
        ## No changes necessary below
        ################################
        
        ### LOGGING
        print(f'Epoch: {epoch+1:03d}/{num_epochs:03d}'
               f' | Batch {batch_idx:03d}/{len(train_loader):03d}'
               f' | Loss: {loss:.2f}')
            

## 7) Evaluating the results

In [None]:
def compute_accuracy(model, dataloader):

    model = model.eval()
    
    correct = 0.0
    total_examples = 0
    
    for idx, (features, class_labels) in enumerate(dataloader):
        
        with torch.no_grad():
            logits = model(features)
        
        pred = torch.argmax(logits, dim=1)

        compare = class_labels == pred
        correct += torch.sum(compare)
        total_examples += len(compare)

    return correct / total_examples

In [None]:
train_acc = compute_accuracy(model, train_loader)

In [None]:
print(f"Accuracy: {train_acc*100}%")

## 8) Optional: visualizing the decision boundary

In [None]:
plt.plot(
    X_train[y_train == 0, 0],
    X_train[y_train == 0, 1],
    marker="D",
    markersize=10,
    linestyle="",
    label="Class 0",
)

plt.plot(
    X_train[y_train == 1, 0],
    X_train[y_train == 1, 1],
    marker="^",
    markersize=13,
    linestyle="",
    label="Class 1",
)

plt.legend(loc=2)

plt.xlim([-5, 5])
plt.ylim([-5, 5])

plt.xlabel("Feature $x_1$", fontsize=12)
plt.ylabel("Feature $x_2$", fontsize=12)

plt.grid()
plt.show()

In [None]:
def plot_boundary(model):

    w1 = model.linear.weight[0][0].detach()
    w2 = model.linear.weight[0][1].detach()
    b = model.linear.bias[0].detach()

    x1_min = -20
    x2_min = (-(w1 * x1_min) - b) / w2

    x1_max = 20
    x2_max = (-(w1 * x1_max) - b) / w2

    return x1_min, x1_max, x2_min, x2_max

In [None]:
x1_min, x1_max, x2_min, x2_max = plot_boundary(model)


plt.plot(
    X_train[y_train == 0, 0],
    X_train[y_train == 0, 1],
    marker="D",
    markersize=10,
    linestyle="",
    label="Class 0",
)

plt.plot(
    X_train[y_train == 1, 0],
    X_train[y_train == 1, 1],
    marker="^",
    markersize=13,
    linestyle="",
    label="Class 1",
)

plt.plot([x1_min, x1_max], [x2_min, x2_max], color="k")

plt.legend(loc=2)

plt.xlim([-5, 5])
plt.ylim([-5, 5])

plt.xlabel("Feature $x_1$", fontsize=12)
plt.ylabel("Feature $x_2$", fontsize=12)

plt.grid()
plt.show()