## brightness model

The purpose of this notebook is to model the relationship between features and sky brightness.
Data is based on GaN dataset (which is assumed already written to disk).

See [link](https://pytorch.org/tutorials/beginner/basics/optimization_tutorial.html)


In [147]:
from pathlib import Path

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, random_split

cwd = Path.cwd()
df = pd.read_csv(cwd / "data" / "gan.csv")
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
torch.set_printoptions(sci_mode=False)

In [148]:
features = [
    "Latitude",
    "Longitude",
    "Elevation(m)",
    "CloudCover",
    "UTTimeHour",
]
feature_tensor = torch.tensor(df[features].values.astype(np.float32))
feature_tensor = torch.nan_to_num(feature_tensor, nan=0.0)

target_tensor = torch.tensor(df["SQMReading"].values.astype(np.float32)).type(
    torch.FloatTensor
)

data_tensor = TensorDataset(feature_tensor, target_tensor)
train_size = int(0.8 * len(data_tensor))
test_size = len(data_tensor) - train_size
train_tensor, test_tensor = random_split(data_tensor, [train_size, test_size])

train_dataloader = DataLoader(dataset=train_tensor, batch_size=16, shuffle=True)
test_dataloader = DataLoader(dataset=test_tensor, batch_size=16, shuffle=True)

feature_tensor, target_tensor

(tensor([[    34.2365,   -110.0840,   1964.3800,      6.0000,      0.5000],
         [    33.3369,   -111.4250,    561.7730,      0.0000,      0.7071],
         [    38.8878,   -119.8200,   1466.4800,      0.0000,      0.9659],
         ...,
         [    37.8585,   -122.1440,    345.8890,      0.0000,      0.9659],
         [    47.6102,     20.7281,     91.7003,      0.0000,     -1.0000],
         [    47.6102,     20.7281,     91.7585,      0.0000,     -0.9659]]),
 tensor([17.7800, 20.6700, 21.2400,  ..., 19.5000, 20.6700, 20.9400]))

In [149]:
HIDDEN_SIZE = 64 * 3
OUTPUT_SIZE = 1
FEATURES_SIZE = len(features)


class NeuralNetwork(nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(FEATURES_SIZE, HIDDEN_SIZE),
            nn.ReLU(),
            nn.Linear(HIDDEN_SIZE, HIDDEN_SIZE // 2),
            nn.ReLU(),
            nn.Linear(HIDDEN_SIZE // 2, OUTPUT_SIZE),
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits


model = NeuralNetwork().to(device)
model

NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=5, out_features=192, bias=True)
    (1): ReLU()
    (2): Linear(in_features=192, out_features=96, bias=True)
    (3): ReLU()
    (4): Linear(in_features=96, out_features=1, bias=True)
  )
)

In [150]:
loss_fn = nn.HuberLoss()
learning_rate = 1e-5
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [151]:
def train_loop(
    data_loader: DataLoader,
    model: NeuralNetwork,
    loss_fn: nn.HuberLoss,
    optimizer: torch.optim.Adam,
):
    model.train()
    for batch, (X, y) in enumerate(data_loader):
        optimizer.zero_grad()
        output = model(X)
        loss = loss_fn(output.squeeze(), y)
        loss.backward()
        nn.utils.clip_grad.clip_grad_norm(model.parameters(), max_norm=5)
        optimizer.step()
        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            size = len(data_loader.dataset)
            print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")

In [152]:
def test_model(data_loader: DataLoader, model: NeuralNetwork, loss_fn: nn.MSELoss):
    model.eval()
    with torch.no_grad():
        test_loss = 0
        for batch, (X, y) in enumerate(data_loader):
            pred = model(X)
            print(f"prediction at {batch} was {pred} for {X} ")
            loss = loss_fn(pred.squeeze(), y)
            test_loss += loss.item() * X.size(0)
        avg_loss = test_loss / len(data_loader.dataset)
        print(f"avg loss in test is {avg_loss}")

In [153]:
epochs = 100
for t in range(epochs):
    print(f"epoch {t+1}")
    train_loop(train_dataloader, model, loss_fn, optimizer)

epoch 1
loss: 13.911648 [   16/11888]


  nn.utils.clip_grad.clip_grad_norm(model.parameters(), max_norm=5)


loss: 16.296066 [ 1616/11888]


loss: 8.594501 [ 3216/11888]
loss: 10.901901 [ 4816/11888]
loss: 8.293318 [ 6416/11888]
loss: 6.669603 [ 8016/11888]
loss: 5.551595 [ 9616/11888]
loss: 5.993745 [11216/11888]
epoch 2
loss: 2.825642 [   16/11888]
loss: 3.771607 [ 1616/11888]
loss: 3.539229 [ 3216/11888]
loss: 3.974292 [ 4816/11888]
loss: 3.145385 [ 6416/11888]
loss: 2.978451 [ 8016/11888]
loss: 3.572681 [ 9616/11888]
loss: 3.228298 [11216/11888]
epoch 3
loss: 2.933829 [   16/11888]
loss: 5.599508 [ 1616/11888]
loss: 2.861788 [ 3216/11888]
loss: 2.522608 [ 4816/11888]
loss: 3.245562 [ 6416/11888]
loss: 2.482477 [ 8016/11888]
loss: 3.330313 [ 9616/11888]
loss: 2.878099 [11216/11888]
epoch 4
loss: 3.172630 [   16/11888]
loss: 2.516014 [ 1616/11888]
loss: 2.673995 [ 3216/11888]
loss: 3.403836 [ 4816/11888]
loss: 0.613145 [ 6416/11888]
loss: 1.787099 [ 8016/11888]
loss: 2.194581 [ 9616/11888]
loss: 2.484262 [11216/11888]
epoch 5
loss: 1.088346 [   16/11888]
loss: 1.796514 [ 1616/11888]
loss: 1.239002 [ 3216/11888]
loss: 2.32

In [154]:
test_model(test_dataloader, model, loss_fn)

prediction at 0 was tensor([[21.5945],
        [18.1674],
        [19.4583],
        [20.2820],
        [17.0507],
        [17.3644],
        [20.9524],
        [17.9393],
        [20.1809],
        [16.2583],
        [21.4806],
        [20.6948],
        [20.6776],
        [17.7301],
        [21.6605],
        [19.5955]]) for tensor([[    36.5732,   -115.8790,   1090.2200,      0.0000,      0.9659],
        [    42.3180,    -83.2971,    189.7800,      0.0000,      0.0000],
        [    33.6953,   -112.1310,    424.9400,      0.0000,      0.5000],
        [    46.3091,    -79.4608,    207.0000,      0.0000,      0.7071],
        [    42.3164,    -83.2980,    189.7300,      6.0000,      0.2588],
        [    27.5034,   -109.9570,     34.5100,      0.0000,      0.8660],
        [    40.1723,   -105.1130,   1527.4000,      0.0000,      0.7071],
        [    40.2031,    -79.9275,    232.1800,      4.0000,      0.2588],
        [    36.0250,   -114.9480,    610.0200,      0.0000,      1.000

prediction at 11 was tensor([[18.7819],
        [19.4498],
        [18.2248],
        [14.5454],
        [19.2950],
        [16.9710],
        [21.2365],
        [18.1931],
        [18.0582],
        [12.2442],
        [18.2303],
        [17.3557],
        [22.0914],
        [18.5166],
        [18.4804],
        [18.4460]]) for tensor([[    41.3360,     21.5557,    655.1100,      0.0000,     -1.0000],
        [    32.2514,   -111.0620,    780.1900,      0.0000,      0.7071],
        [    44.4300,     24.3600,    144.0800,      0.0000,     -1.0000],
        [    20.6504,    -87.0868,      9.6987,      0.0000,      0.2588],
        [    32.3499,   -111.0390,    717.6800,      0.0000,      0.7071],
        [    28.6683,    -17.9032,    522.9210,      0.0000,      0.0000],
        [   -31.2012,    -71.0002,   1139.0800,      2.0000,      0.5000],
        [    42.3298,    -83.2709,    187.6800,      0.0000,      0.0000],
        [    36.2090,      6.7997,    728.0520,      0.0000,     -0.86

In [155]:
torch.save(model.state_dict(), cwd / "model.pth")