In [260]:
import torch
import pandas as pd
from torch import nn, optim
from torch.nn import functional as F
from plotly import express as px, graph_objects as go
from typing import TypeVar, Callable
from sklearn.datasets import make_blobs

In [2]:
def f(x: torch.Tensor) -> torch.Tensor:
    return (x - 1) ** 2

In [38]:
x = torch.linspace(-10, 10, 1000)
y = f(x)

In [39]:
y.device

device(type='cpu')

In [40]:
df = pd.DataFrame(dict(x=x, y=y))

In [41]:
df

Unnamed: 0,x,y
0,-10.000000,121.000000
1,-9.979980,120.559952
2,-9.959960,120.120720
3,-9.939939,119.682274
4,-9.919920,119.244652
...,...,...
995,9.919920,79.564972
996,9.939939,79.922516
997,9.959960,80.280884
998,9.979980,80.640030


In [42]:
fig = (
    px.line(df, x="x", y="y")
    .update_layout(xaxis_title="$x$", yaxis_title="$f(x)=(x - 1) ^ 2$")
)
fig.show()

$$
f(x) = (x - 1) ^ 2\\
\frac{df}{dx} = 2 ( x - 1)\\
0 = 2 (x - 1)\\
x = 1
$$

In [64]:
DataType = TypeVar("DataType", torch.Tensor, float)
def f(x: DataType) -> DataType:
    return torch.exp(-0.1 * x) * torch.cos(-5 * x)

In [74]:
x = torch.linspace(-10, 10, 1000)
y = f(x)

In [75]:
def df_dx(x: float) -> float:
    x = torch.tensor(x, requires_grad=True, device="cuda")
    y = f(x)
    y.backward()
    return x.grad.cpu()

In [76]:
df_dx(0.0)

tensor(-0.1000)

$$
y = mx + b\\
b = y - mx
$$

In [77]:
def get_params(x: float) -> torch.Tensor:
    m = df_dx(x)
    y = f(torch.tensor(x))
    b = y - m * x
    return torch.tensor([m, b])

In [105]:
params = get_params(0.624)

In [106]:
params

tensor([-0.0075, -0.9346])

In [107]:
df = pd.concat([
    pd.DataFrame(dict(x=x.cpu(), y=y.cpu(), label="Data")),
    pd.DataFrame(
        dict(x=x.cpu(), y=(params[0] * x + params[1]).to("cpu"), label="Grad")
        )
    ])

fig = (
    px.line(df, x="x", y="y", color="label")
    .update_layout(xaxis_title="$x$", yaxis_title="$f(x)=(x - 1) ^ 2$")
)
fig.show()

- Stochastic Gradient Descent (SGD):

$$
\mathbf{w}_{i + 1} = \mathbf{w}_{i} - \alpha \nabla f(\mathbf{w_{i}})
$$

# Regresion Lineal
---

$$
\tilde{\mathbf{y}} = \mathbf{X}\cdot\mathbf{w}
$$

- $\mathbf{X} \in \mathbb{R} ^ {N \times m}$: matriz de caracteristicas.
- $\mathbf{y} \in \mathbb{R} ^ {N \times 1}$: vector de etiquetas.
- $\mathbf{w} \in \mathbb{R} ^ {m \times 1}$: pesos o parametros del modelo.
- $N$: numero de muestras.
- $m$: numero de caracteristicas.

In [111]:
x = 2 * torch.rand(size=(1000, 1)) - 1
y = 2 * x + 1 + torch.randn(size=(1000, 1)) * 0.2

In [112]:
x.min(), x.max()

(tensor(-0.9996), tensor(0.9968))

In [114]:
df = pd.DataFrame(dict(x=x.flatten(), y=y.flatten()))

In [116]:
fig = (
    px.scatter(df, x="x", y="y")
)
fig.show()

In [117]:
def predict(features: torch.Tensor, weights: torch.Tensor) -> torch.Tensor:
    features_ones = torch.concat([features, torch.ones_like(features)], dim=1)
    return features_ones @ weights

$$
\tilde{y} = \mathbf{x} \cdot \mathbf{w}\\
\tilde{y} = (x_0, x_1, x_2, 1) \cdot \left(\begin{matrix}w_0\\w_1\\w_2\\w_3\end{matrix}\right)\\
\tilde{y} = x_0 w_0 + x_1 w_1 + x_2 w_2 + w_3
$$

In [122]:
x_range = torch.linspace(-1, 1, 100).reshape(-1, 1)
w_rand = torch.randn(size=(2, 1))
y_pred = predict(x_range, w_rand)

In [123]:
df = pd.concat([
    pd.DataFrame(dict(x=x.flatten(), y=y.flatten(), label="data")),
    pd.DataFrame(dict(x=x_range.flatten(), y=y_pred.flatten(), label="model"))
])

In [124]:
fig = (
    px.scatter(df, x="x", y="y", color="label")
)
fig.show()

- Mean Squared Error (MSE):

$$
\text{MSE}(\mathbf{w}) = \frac{1}{N} \sum_{i=1} ^ N (y_i - \tilde{y}_i) ^ 2\\
$$

In [125]:
def mse(y_true: torch.Tensor, y_pred: torch.Tensor) -> torch.Tensor:
    return ((y_true - y_pred) ** 2).mean()

In [130]:
x_range = torch.linspace(-1, 1, 100).reshape(-1, 1)
w_rand = torch.randn(size=(2, 1))
y_pred = predict(x_range, w_rand)
df = pd.concat([
    pd.DataFrame(dict(x=x.flatten(), y=y.flatten(), label="data")),
    pd.DataFrame(dict(x=x_range.flatten(), y=y_pred.flatten(), label="model"))
])
loss = mse(y, predict(x, w_rand))
fig = (
    px.scatter(df, x="x", y="y", color="label")
    .update_layout(title=f"Loss: {loss:.4f}")
)
fig.show()

In [132]:
def get_loss(
    features: torch.Tensor, labels: torch.Tensor
    ) -> Callable[[torch.Tensor], torch.Tensor]:
    def loss_fn(weights: torch.Tensor) -> torch.Tensor:
        y_pred = predict(features, weights)
        loss_val = mse(labels, y_pred)
        return loss_val
    return loss_fn

In [133]:
loss_fn = get_loss(x, y)

In [134]:
loss_fn(w_rand)

tensor(11.9463)

In [152]:
def optimize(
    features: torch.Tensor,
    labels: torch.Tensor,
    alpha: float,
    epochs: int,
    device: torch.device
    ) -> torch.Tensor:
    loss_fn = get_loss(features.to(device), labels.to(device))
    w = torch.randn(
        size=(features.shape[1] + 1, 1), device=device, requires_grad=True
        )
    for i in range(epochs):
        loss_val = loss_fn(w)
        loss_val.backward()
        w = (w - alpha * w.grad).clone().detach().requires_grad_(True)
    return w.detach().cpu()

In [153]:
w = optimize(x, y, 0.1, 100, torch.device("cuda"))

In [155]:
x_range = torch.linspace(-1, 1, 100).reshape(-1, 1)
y_pred = predict(x_range, w)
df = pd.concat([
    pd.DataFrame(dict(x=x.flatten(), y=y.flatten(), label="data")),
    pd.DataFrame(dict(x=x_range.flatten(), y=y_pred.flatten(), label="model"))
])
loss = mse(y, predict(x, w))
fig = (
    px.scatter(df, x="x", y="y", color="label")
    .update_layout(title=f"Loss: {loss:.4f}")
)
fig.show()

# Regresion Logistica
---

- Prediccion:

$$
\tilde{\mathbf{y}} = \frac{1}{1 + e ^ {\mathbf{X}\cdot \mathbf{w}}}
$$

- Funcion de perdida (entropia binaria cruzada):

$$
\mathcal{L}(\mathbf{w}) = -\frac{1}{N} \sum_{i=1} ^ N y_i \log{\tilde{y}_i} + (1 - y_i) \log{(1 - \tilde{y}_i)}
$$

In [156]:
def binary_crossentropy(y_true: torch.Tensor, y_pred: torch.Tensor) -> torch.Tensor:
    return - (
        y_true * torch.log(y_pred) +
        (1 - y_true) * torch.log(1 - y_pred)
    ).mean()

In [166]:
y_true = torch.tensor([1., ])
y_pred = torch.tensor([0.01,])

In [167]:
binary_crossentropy(y_true, y_pred)

tensor(4.6052)

In [240]:
features, labels = make_blobs(
    n_samples=1000, centers=2, random_state=42, cluster_std=0.5
    )
features = (
    (features - features.min(axis=0, keepdims=True)) /
    (features.max(axis=0, keepdims=True) - features.min(axis=0, keepdims=True))
    )

In [241]:
features = torch.tensor(features, dtype=torch.float32)
labels = torch.tensor(labels, dtype=torch.float32).reshape(-1, 1)

In [242]:
df = pd.DataFrame(data=features, columns=["x1", "x2"]).assign(y=labels)
df

Unnamed: 0,x1,x2,y
0,0.201992,0.761324,0.0
1,0.952108,0.114336,1.0
2,0.814672,0.071871,1.0
3,0.894890,0.113372,1.0
4,0.860045,0.293815,1.0
...,...,...,...
995,0.191939,0.924215,0.0
996,0.904911,0.165581,1.0
997,0.801016,0.063560,1.0
998,0.924776,0.138393,1.0


In [243]:
fig = (
    px
    .scatter(df, x="x1", y="x2", color="y", color_continuous_scale="RdBu")
    .update_layout(xaxis_title="$x_1$", yaxis_title="$x_2$")
)
fig.show()

In [244]:
def predict(features: torch.Tensor, weights: torch.Tensor) -> torch.Tensor:
    features_ones = torch.concat([
        features, torch.ones(
            size=(features.shape[0], 1),
            dtype=features.dtype,
            device=features.device
            )
        ], dim=1)
    return 1. / (1. + torch.exp(features_ones @ weights))

In [245]:
def get_loss(
    features: torch.Tensor, labels: torch.Tensor
    ) -> Callable[[torch.Tensor], torch.Tensor]:
    def loss_fn(weights: torch.Tensor) -> torch.Tensor:
        y_pred = predict(features, weights)
        loss_val = binary_crossentropy(labels, y_pred)
        return loss_val
    return loss_fn

In [246]:
def optimize(
    features: torch.Tensor,
    labels: torch.Tensor,
    alpha: float,
    epochs: int,
    device: torch.device
    ) -> torch.Tensor:
    loss_fn = get_loss(features.to(device), labels.to(device))
    w = torch.randn(
        size=(features.shape[1] + 1, 1), device=device, requires_grad=True
        )
    for i in range(epochs):
        loss_val = loss_fn(w)
        loss_val.backward()
        w = (w - alpha * w.grad).clone().detach().requires_grad_(True)
    return w.detach().cpu()

In [247]:
w = optimize(features, labels, 0.1, 100, torch.device("cuda"))

In [248]:
w

tensor([[-1.2395],
        [ 1.3849],
        [-0.0547]])

In [249]:
x1 = torch.linspace(0., 1., 100)
x2 = torch.linspace(0., 1., 100)
X1, X2 = torch.meshgrid(x1, x2)
X_grid = torch.concat([X1.reshape(-1, 1), X2.reshape(-1, 1)], dim=1)
y_grid = predict(X_grid, w).reshape(X1.shape)

In [251]:
fig = (
    px
    .scatter(df, x="x1", y="x2", color="y", color_continuous_scale="RdBu")
    .add_trace(go.Contour(x=x1, y=x2, z=y_grid, colorscale="RdBu", opacity=0.4))
    .update_layout(xaxis_title="$x_1$", yaxis_title="$x_2$")
)
fig.show()

In [252]:
y_pred = (predict(features, w) > 0.5).int()

In [253]:
(y_pred == labels.int()).float().mean()

tensor(1.)

In [261]:
class LogisticRegression(nn.Module):
    def __init__(self, input_dims: int, *args, **kwargs):
        super(LogisticRegression, self).__init__(*args, **kwargs)
        self.layer = nn.Linear(
            in_features=input_dims,
            out_features=1
            ) # X @ W + b

    def forward(self, features: torch.Tensor) -> torch.Tensor:
        return F.sigmoid(self.layer(features))

In [262]:
model = LogisticRegression(input_dims=2)

In [263]:
print(model)

LogisticRegression(
  (layer): Linear(in_features=2, out_features=1, bias=True)
)


In [264]:
list(model.parameters())

[Parameter containing:
 tensor([[ 0.5731, -0.0747]], requires_grad=True),
 Parameter containing:
 tensor([0.1943], requires_grad=True)]

In [311]:
def optimize(
    features: torch.Tensor,
    labels: torch.Tensor,
    model: nn.Module,
    loss_fn: Callable,
    epochs: int,
    alpha: float,
    device: torch.device
    ) -> nn.Module:
    features = features.to(device)
    labels = labels.to(device)
    model = model.to(device)
    opt = optim.SGD(model.parameters(), lr=alpha)
    for i in range(epochs):
        y_pred = model(features)
        loss_val = loss_fn(labels, y_pred)

        opt.zero_grad()
        loss_val.backward()
        opt.step()
    return model.cpu()

In [331]:
model = LogisticRegression(input_dims=2)
train_model = optimize(
    features=features,
    labels=labels,
    model=model,
    loss_fn=binary_crossentropy,
    epochs=100,
    alpha=0.1,
    device=torch.device(type="cuda", index=0)
    )

In [332]:
model = LogisticRegression(input_dims=2)

In [333]:
features

tensor([[0.2020, 0.7613],
        [0.9521, 0.1143],
        [0.8147, 0.0719],
        ...,
        [0.8010, 0.0636],
        [0.9248, 0.1384],
        [0.2691, 0.7767]])

In [334]:
y_pred = train_model(features)

In [337]:
y_pred = (train_model(features) > 0.5).int()

In [338]:
(y_pred == labels.int()).float().mean()

tensor(1.)

In [336]:
y_pred

tensor([[0.3132],
        [0.7524],
        [0.7183],
        [0.7351],
        [0.6839],
        [0.7314],
        [0.7351],
        [0.2828],
        [0.7197],
        [0.2681],
        [0.7010],
        [0.7459],
        [0.6533],
        [0.2534],
        [0.6942],
        [0.2836],
        [0.7486],
        [0.7187],
        [0.2708],
        [0.3320],
        [0.2866],
        [0.7078],
        [0.7248],
        [0.2832],
        [0.6820],
        [0.7192],
        [0.3101],
        [0.7186],
        [0.3205],
        [0.2771],
        [0.7382],
        [0.2749],
        [0.6999],
        [0.2966],
        [0.6932],
        [0.3001],
        [0.7505],
        [0.2698],
        [0.7289],
        [0.7060],
        [0.2840],
        [0.7461],
        [0.2808],
        [0.7136],
        [0.7396],
        [0.2696],
        [0.2701],
        [0.3118],
        [0.7070],
        [0.2618],
        [0.2778],
        [0.2592],
        [0.3037],
        [0.2888],
        [0.2593],
        [0

In [322]:
labels.shape

torch.Size([1000, 1])

In [323]:
y_pred.shape

torch.Size([1000, 1])