In [None]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
NDVI_PATH = "../data/PROCESSED/ndvi.csv"
PROD_PATH = "../data/PROCESSED/manhuacu.csv"

## 1. Load Data

In [None]:
NDVI = pd.read_csv(NDVI_PATH)

NDVI["N_Observations"] = NDVI.groupby("Year")["Data"].transform("count")

NDVI

In [None]:
PROD = pd.read_csv(PROD_PATH)
PROD = PROD[PROD["Year"] >= 2000]
PROD

## 2. Prepare Data

In [None]:
NDVI_last_20_per_year = NDVI.groupby("Year").tail(20)
NDVI_last_20_per_year

In [None]:
# Pytorch Dataset


class Dataset(torch.utils.data.Dataset):
    def __init__(self, ndvi_df, prod_df):
        self.ndvi_df = ndvi_df
        self.prod_df = prod_df

    def __len__(self):
        return self.ndvi_df["Year"].nunique()

    def __getitem__(self, idx):
        years = self.ndvi_df["Year"].sort_values().unique()
        if idx >= len(years):
            raise IndexError("Index out of range")
        year = years[idx]
        ndvi = self.ndvi_df[self.ndvi_df["Year"] == year]["NDVI"].values
        prod = self.prod_df[self.prod_df["Year"] == year][
            "Productivity (kg/ha)"
        ].values[0]
        return torch.tensor(ndvi, dtype=torch.float32), torch.tensor(
            prod, dtype=torch.float32
        )


dataset = Dataset(NDVI_last_20_per_year, PROD)
dataset[0]

train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(
    dataset, [train_size, test_size]
)

## 3. Model creation

In [None]:
ff_network = nn.Sequential(
    nn.Linear(20, 50),
    nn.ReLU(),
    nn.Linear(50, 20),
    nn.ReLU(),
    nn.Linear(20, 1),
    nn.ReLU(),
)
ff_network = ff_network.to(device)
optimizer = optim.Adam(ff_network.parameters(), lr=0.01)
loss_fn = nn.MSELoss()

losses = []
for i in range(500):
    epoch_losses = []
    for ndvi, prod in train_dataset:
        ndvi = ndvi.to(device)
        prod = prod.to(device)
        optimizer.zero_grad()
        pred = ff_network(ndvi)
        loss = loss_fn(pred, prod)
        loss.backward()
        optimizer.step()
        epoch_losses.append(loss.item())
    losses.append(np.mean(epoch_losses))

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(losses, label="Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training Loss Over Epochs")
plt.legend()
plt.grid(True)
plt.show()