we can use this paper with we want train for only one speleothem
https://arxiv.org/pdf/1605.06065.pdf

In [1]:
import pandas as pd
from torch import nn
import torch
from torch.utils.data import Dataset, random_split, DataLoader
from sklearn.preprocessing import StandardScaler

# Data

In [2]:
class SpeleothemDating(Dataset):
    def __init__(self, annotations_file, normalize=True):
        self.speleothem = pd.read_csv(annotations_file)
        
        if normalize:
            self.normalize()

        self.x_label = ["depth_dating", "latitude", "longitude", "elevation", "entity_id_site"]
        self.y_label = ["corr_age"]
        
        x = self.speleothem.loc[:, self.x_label].values
        y = self.speleothem.loc[:, self.y_label].values
        
        self.x_train = torch.tensor(x, dtype=torch.float32)
        self.y_train = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.speleothem)

    def __getitem__(self, idx):
        return self.x_train[idx], self.y_train[idx]
    
    def normalize(self):
        std_scaler = StandardScaler()
        self.speleothem = pd.DataFrame(std_scaler.fit_transform(self.speleothem), columns=self.speleothem.columns)


In [3]:
speleothem = SpeleothemDating("training_data.csv")

In [4]:
train_size = int(0.8 * len(speleothem))
test_size = len(speleothem) - train_size
train_dataset, test_dataset = random_split(speleothem, [train_size, test_size])

train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=True)

In [5]:
train_features, train_labels = next(iter(train_dataloader))
train_features, train_labels

(tensor([[ 0.1170,  0.1898,  0.9685, -0.4712,  0.7594],
         [-0.6995, -0.5377,  1.0168, -0.6539, -1.4606],
         [ 0.1221, -0.9713,  1.0854, -0.2323, -0.4842],
         [-0.3142,  1.2524, -2.0357, -0.8928,  0.9454],
         [-0.2237,  0.8835, -0.3863, -0.7593, -1.6465],
         [-0.5694,  0.0352,  0.8667, -0.5105,  1.7009],
         [-0.3775,  0.1898,  0.7341,  0.8076, -0.9492],
         [-0.5898,  0.7690, -1.9138,  0.8217, -0.0774],
         [-0.3556,  0.7188, -0.0943, -0.9476, -1.0421],
         [-0.0428, -0.0800, -1.6160,  0.3073, -0.1123],
         [ 0.5571,  0.9077, -0.1730, -0.7115, -1.3327],
         [ 0.0659, -1.5048,  0.1424, -0.9771,  1.2360],
         [-0.0795,  0.9077, -0.1730, -0.7115, -1.3327],
         [ 0.6630,  0.5972, -0.4914, -0.8647, -1.2281],
         [-0.4163,  0.9316, -0.1179, -0.2393,  1.2825],
         [-0.7046,  1.6250, -0.2114, -0.7733, -1.3908]]),
 tensor([[-0.6667],
         [-0.6557],
         [-0.2582],
         [-0.6503],
         [ 0.0971],
  

# Model

In [6]:
class MultipleRegression(nn.Module):
    def __init__(self, num_features):
        super(MultipleRegression, self).__init__()
        
        self.layer_1 = nn.Linear(num_features, 16)
        self.layer_2 = nn.Linear(16, 32)
        self.layer_3 = nn.Linear(32, 16)
        self.layer_out = nn.Linear(16, 1)
        
        self.relu = nn.ReLU()

    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.relu(self.layer_2(x))
        x = self.relu(self.layer_3(x))
        x = self.layer_out(x)

        return x

In [39]:
model = MultipleRegression(num_features=5)

In [40]:
loss_fn = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [41]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

MultipleRegression(
  (layer_1): Linear(in_features=5, out_features=16, bias=True)
  (layer_2): Linear(in_features=16, out_features=32, bias=True)
  (layer_3): Linear(in_features=32, out_features=16, bias=True)
  (layer_out): Linear(in_features=16, out_features=1, bias=True)
  (relu): ReLU()
)

In [42]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    loss_total = 0
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loss_total += loss.item()

        # if batch % 10 == 0:
        #     loss, current = loss.item(), (batch + 1) * len(X)
            # print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

    return loss_total/batch

In [43]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()

    test_loss /= num_batches
    
    return test_loss
    # print(f"Avg loss: {test_loss:>8f} \n")

In [44]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    loss_train = train(train_dataloader, model, loss_fn, optimizer)
    loss_test = test(test_dataloader, model, loss_fn)
    print(f"loss_train: {loss_train}, loss_test: {loss_test}")
print("Done!")

Epoch 1
-------------------------------
loss_train: 0.6019951998842535, loss_test: 0.5680917636914686
Epoch 2
-------------------------------
loss_train: 0.577360453222085, loss_test: 0.5478264410387386
Epoch 3
-------------------------------
loss_train: 0.5596636367478857, loss_test: 0.5315844199874185
Epoch 4
-------------------------------
loss_train: 0.5440450710830865, loss_test: 0.5098337709903717
Epoch 5
-------------------------------
loss_train: 0.5353831554314604, loss_test: 0.5083975266326558
Epoch 6
-------------------------------
loss_train: 0.5253931848263299, loss_test: 0.4927056247537786
Epoch 7
-------------------------------
loss_train: 0.5151748481310077, loss_test: 0.5182167251001705
Epoch 8
-------------------------------
loss_train: 0.5104886864622434, loss_test: 0.5021070038730447
Epoch 9
-------------------------------
loss_train: 0.5083806174634783, loss_test: 0.4904240136796778
Epoch 10
-------------------------------
loss_train: 0.4941555221944495, loss_test: