In [1]:
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split

In [2]:
# atomspheric features for training model

dataframe = pd.read_csv("/content/drive/My Drive/Data/Features.csv")

In [3]:
#converting the dataframe into a dict
dataframe = dataframe.set_index("filename")
dataframe = dataframe.T.to_dict()

In [4]:
#loading the slope embeddings
slope_embeddings = torch.load("/content/drive/My Drive/Data/Slope_embeddings.pt")

In [5]:
#loading the elevation embeddings
elevation_embeddings = torch.load("/content/drive/My Drive/Data/Elevation_embeddings.pt")

In [6]:
for key in elevation_embeddings:
    matrix_min = elevation_embeddings[key].min()
    matrix_max = elevation_embeddings[key].max()
    elevation_embeddings[key] = (elevation_embeddings[key] - matrix_min) / (matrix_max - matrix_min)

for key in slope_embeddings:
    matrix_min = slope_embeddings[key].min()
    matrix_max = slope_embeddings[key].max()
    slope_embeddings[key] = (slope_embeddings[key] - matrix_min) / (matrix_max - matrix_min)


In [7]:
#concat the slope and elevation embeddings generated using encoder decoder
combined_dict = {}
for key in slope_embeddings.keys():
    combined_dict[key] = torch.cat((slope_embeddings[key], elevation_embeddings[key]), dim=1)

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.encoder1 = nn.Sequential(
            nn.Linear(2000, 1000),
            nn.ReLU(),
            nn.Linear(1000,500),
            nn.ReLU()
            )
        self.encoder2 = nn.Sequential(
            nn.Linear(508, 250),
            nn.ReLU()
        )

        self.decoder = nn.Sequential(
            nn.Linear(250, 508),
            nn.ReLU(),
            nn.Linear(508, 1000),
            nn.ReLU(),
            nn.Linear(1000, 2000),
            nn.Sigmoid()
            )


    def forward(self, x, features):
        x = self.encoder1(x)
        #print("Before squeeze: ", x.shape, features.shape)
        x = x.squeeze(1)
        features = features.squeeze(1)
        #print("After squeeze: ", x.shape, features.shape)
        x = torch.cat((x, features), dim=1)
        x = self.encoder2(x)
        x = self.decoder(x)
        return x


In [9]:
class Dataset(Dataset):
  def __init__(self, data, feature_map):
    self.filename = list(data.keys())
    self.data = data
    self.feature_map = feature_map

  def __len__(self):
    return len(self.filename)

  def __getitem__(self, index):
    features = self.feature_map[self.filename[index]]
    features = torch.FloatTensor([features[key] for key in features]).unsqueeze(0)
    return [self.data[self.filename[index]].detach(), features]



In [15]:
data = Dataset(combined_dict, dataframe)
batch_size = 4
train_size = 35
test_size = 7
val_size = 8

train_dataset, test_dataset, val_dataset = random_split(data, [train_size, test_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [16]:
model = AutoEncoder()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
n_epochs = 100

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

AutoEncoder(
  (encoder1): Sequential(
    (0): Linear(in_features=2000, out_features=1000, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1000, out_features=500, bias=True)
    (3): ReLU()
  )
  (encoder2): Sequential(
    (0): Linear(in_features=508, out_features=250, bias=True)
    (1): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=250, out_features=508, bias=True)
    (1): ReLU()
    (2): Linear(in_features=508, out_features=1000, bias=True)
    (3): ReLU()
    (4): Linear(in_features=1000, out_features=2000, bias=True)
    (5): Sigmoid()
  )
)

In [17]:
n_epochs = 1000
patience = 10

# track the validation loss from the previous epoch
best_valid_loss = float('inf')
counter = 0

for epoch in range(n_epochs):
    for batch in train_loader:
        inputs, features = batch

        # Move inputs to the device if using GPU
        inputs = inputs.squeeze(1).to(device)
        features = features.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs, features)

        # Compute the loss
        loss = criterion(outputs, inputs)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

    # Compute the validation loss
    model.eval()  # Set the model to evaluation mode
    val_loss = 0.0
    with torch.no_grad():
        for val_batch_data in val_loader:
            inputs, features = val_batch_data
            #val_batch_data = val_batch_data.to(device)
            val_inputs = inputs.squeeze(1).to(device)
            val_features = features.to(device)
            val_outputs = model(val_inputs, val_features)
            val_loss += criterion(val_outputs, val_inputs).item()

    val_loss /= len(val_loader)

    if val_loss < best_valid_loss:
        best_valid_loss = val_loss
        counter = 0  # reset counter
    else:
        counter += 1  # increment counter if validation loss has not improved

    print('Epoch [{}/{}], Loss: {:.4f}, Validation Loss: {:.4f}'
          .format(epoch+1, n_epochs, loss.item(), val_loss))

    # If the validation loss hasn't improved in `patience` epochs, stop training early
    if counter == patience:
        print("Early stopping")
        break


Epoch [1/1000], Loss: 0.2233, Validation Loss: 0.2087
Epoch [2/1000], Loss: 0.1949, Validation Loss: 0.1923
Epoch [3/1000], Loss: 0.1855, Validation Loss: 0.1772
Epoch [4/1000], Loss: 0.1700, Validation Loss: 0.1630
Epoch [5/1000], Loss: 0.1490, Validation Loss: 0.1493
Epoch [6/1000], Loss: 0.1362, Validation Loss: 0.1358
Epoch [7/1000], Loss: 0.1343, Validation Loss: 0.1224
Epoch [8/1000], Loss: 0.1094, Validation Loss: 0.1091
Epoch [9/1000], Loss: 0.1034, Validation Loss: 0.0959
Epoch [10/1000], Loss: 0.0890, Validation Loss: 0.0831
Epoch [11/1000], Loss: 0.0668, Validation Loss: 0.0708
Epoch [12/1000], Loss: 0.0707, Validation Loss: 0.0596
Epoch [13/1000], Loss: 0.0540, Validation Loss: 0.0495
Epoch [14/1000], Loss: 0.0463, Validation Loss: 0.0408
Epoch [15/1000], Loss: 0.0353, Validation Loss: 0.0335
Epoch [16/1000], Loss: 0.0313, Validation Loss: 0.0276
Epoch [17/1000], Loss: 0.0291, Validation Loss: 0.0231
Epoch [18/1000], Loss: 0.0188, Validation Loss: 0.0195
Epoch [19/1000], Lo

In [18]:
model.eval()  # set the model to evaluation mode
test_loss = 0.0

with torch.no_grad():
    for test_batch_data in test_loader:
        test_inputs, features = test_batch_data
        test_inputs = test_inputs.squeeze(1).to(device)
        features = features.squeeze(1).to(device)
        test_outputs = model(test_inputs, features)
        test_loss += criterion(test_outputs, test_inputs).item()

test_loss /= len(test_loader)

print('Test Loss: {:.4f}'.format(test_loss))

Test Loss: 0.0058


In [19]:
torch.save(model, "/content/drive/My Drive/Thesis/Model/LinearAutoEncoder.pt")