we can use this paper with we want train for only one speleothem
https://arxiv.org/pdf/1605.06065.pdf

In [33]:
import pandas as pd
from torch import nn
import torch
from torch.utils.data import Dataset, random_split, DataLoader
from sklearn.preprocessing import StandardScaler

# Data

In [34]:
class SpeleothemDating(Dataset):
    def __init__(self, annotations_file, normalize=True):
        self.speleothem = pd.read_csv(annotations_file)
        
        if normalize:
            self.normalize()

        self.x_label = ["depth_dating", "latitude", "longitude", "elevation", "entity_id_site"]
        self.y_label = ["corr_age"]
        
        x = self.speleothem.loc[:, self.x_label].values
        y = self.speleothem.loc[:, self.y_label].values
        
        self.x_train = torch.tensor(x, dtype=torch.float32)
        self.y_train = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.speleothem)

    def __getitem__(self, idx):
        return self.x_train[idx], self.y_train[idx]
    
    def normalize(self):
        std_scaler = StandardScaler()
        self.speleothem = pd.DataFrame(std_scaler.fit_transform(self.speleothem), columns=self.speleothem.columns)


In [35]:
speleothem = SpeleothemDating("training_data.csv")
speleothem.normalize()

In [36]:
train_size = int(0.8 * len(speleothem))
test_size = len(speleothem) - train_size
train_dataset, test_dataset = random_split(speleothem, [train_size, test_size])

train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=True)

In [37]:
train_features, train_labels = next(iter(train_dataloader))
train_features, train_labels

(tensor([[-0.6689, -1.9249,  1.7576, -0.8928,  0.2248],
         [-0.6480,  0.9289, -0.2998,  1.7421, -1.0538],
         [-0.6613,  1.2524, -2.0357, -0.8928,  0.9454],
         [-0.5985, -0.1049, -1.4917, -0.2323,  0.5037],
         [ 1.3698, -1.2136,  0.1790, -0.9448, -1.3211],
         [-0.0553,  0.3425, -1.6055, -0.5752, -1.1003],
         [-0.3296,  0.8161, -1.5272, -0.5049,  0.5153],
         [-0.7085,  1.0446, -0.3320, -0.7522, -1.5071],
         [-0.6463, -2.0125,  1.7877, -0.1620,  1.6195],
         [-0.6740, -2.2351,  1.6668, -0.6398,  0.1783],
         [-0.7123, -1.1026, -0.9076, -0.0496, -0.9724],
         [-0.3481,  0.9885, -0.1435, -0.5302,  1.5149],
         [ 0.3339,  0.1154,  0.3095,  0.1190,  0.0737],
         [ 1.0739, -2.0747,  1.7257, -0.8928,  1.6428],
         [-0.2170, -0.1049, -1.4917, -0.2323,  0.5037],
         [-0.1446, -1.1120, -0.9004, -0.2182,  0.9803]]),
 tensor([[-0.4995],
         [-0.5379],
         [-0.6690],
         [-0.6707],
         [-0.2432],
  

# Model

In [None]:
class MultipleRegression(nn.Module):
    def __init__(self, num_features):
        super(MultipleRegression, self).__init__()
        
        self.layer_1 = nn.Linear(num_features, 16)
        self.layer_2 = nn.Linear(16, 32)
        self.layer_3 = nn.Linear(32, 16)
        self.layer_out = nn.Linear(16, 1)
        
        self.relu = nn.ReLU()

    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.relu(self.layer_2(x))
        x = self.relu(self.layer_3(x))
        x = self.layer_out(x)

        return x

In [None]:
model = MultipleRegression(num_features=1)

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [None]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")