## Plan of action.
Naive approach:

We have too little data to train a model dircetly on the data. It will either be too stupid or terribly overfit.

Ideas:
1. don't use pooling but stride to reduce amount of parameters.
2. normailze
3. batch norm
4. dropout

In [98]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


### get data

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import shutil
shutil.unpack_archive("drive/MyDrive/SolarEnergyMaterials/task4.zip", "/content/data")
shutil.unpack_archive("data/task4_hr35z9/pretrain_features.csv.zip", "/content/data")
shutil.unpack_archive("data/task4_hr35z9/pretrain_labels.csv.zip", "/content/data")
shutil.unpack_archive("data/task4_hr35z9/train_features.csv.zip", "/content/data")
shutil.unpack_archive("data/task4_hr35z9/train_labels.csv.zip", "/content/data")

In [5]:
import random
import numpy as np
import pandas as pd
import torch
EPSILON = 1e-10
def load_pretrain_data(batch_size = 64):
    batch_size = 64

    random.seed(17)
    test_ind = set()

    pre_train_size = 50000

    while len(test_ind) < 10000: 
        test_ind.add(random.randint(0, pre_train_size-1))

    features =[]
    labels = []

    with open("data/pretrain_features.csv", 'r') as f:
        for row in f:
            features.append(row)

    with open("data/pretrain_labels.csv", 'r') as f:
        for row in f:
            labels.append(row)

    # remove header
    features = features[1:]
    labels = labels[1:]

    # first try to note use representation of the molecules, only the extracted features
    features = [list(map(float,row.split(',')[2:])) for row in features]
    labels = [float(row.split(',')[1]) for row in labels]

    train_features = []
    train_labels = []
    test_features = []
    test_labels = []


    for i in range(len(features)):
        if i in test_ind:
            test_features.append(features[i])
            test_labels.append(labels[i])
        else:
            train_features.append(features[i])
            train_labels.append(labels[i])

    # does not seem to make sense to normalize the data since it is very sparse
    # normalize train_features
    # train_features = (train_features - np.mean(train_features, axis=0)) / (np.std(train_features, axis=0)+EPSILON)

    # normalize test_features
    # test_features = (test_features - np.mean(test_features, axis=0)) / (np.std(test_features, axis=0)+EPSILON)

    # convert into tensor dataset
    train_features = torch.tensor(train_features, dtype=torch.float)
    train_labels = torch.tensor(train_labels, dtype=torch.float)
    test_features = torch.tensor(test_features, dtype=torch.float)
    test_labels = torch.tensor(test_labels, dtype=torch.float)

    train_dataset = torch.utils.data.TensorDataset(train_features, train_labels)
    test_dataset = torch.utils.data.TensorDataset(test_features, test_labels) 
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

In [6]:
train_loader, test_loader = load_pretrain_data(batch_size = 64)

In [11]:
def load_finetune_data(batch_size = 4):
    batch_size = 4

    random.seed(17)
    test_ind = set()

    pre_train_size = 100
    while len(test_ind) < 50: 
        test_ind.add(random.randint(0, pre_train_size-1))

    features =[]
    labels = []

    with open("data/train_features.csv", 'r') as f:
        for row in f:
            features.append(row)

    with open("data/train_labels.csv", 'r') as f:
        for row in f:
            labels.append(row)

    # remove header
    features = features[1:]
    labels = labels[1:]

    # first try to note use representation of the molecules, only the extracted features
    features = [list(map(float,row.split(',')[2:])) for row in features]
    labels = [float(row.split(',')[1]) for row in labels]

    train_features = []
    train_labels = []
    test_features = []
    test_labels = []


    for i in range(len(features)):
        if i in test_ind:
            test_features.append(features[i])
            test_labels.append(labels[i])
        else:
            train_features.append(features[i])
            train_labels.append(labels[i])

    # does not seem to make sense to normalize the data since it is very sparse
    # normalize train_features
    # train_features = (train_features - np.mean(train_features, axis=0)) / (np.std(train_features, axis=0)+EPSILON)

    # normalize test_features
    # test_features = (test_features - np.mean(test_features, axis=0)) / (np.std(test_features, axis=0)+EPSILON)

    # convert into tensor dataset
    train_features = torch.tensor(train_features, dtype=torch.float)
    train_labels = torch.tensor(train_labels, dtype=torch.float)
    test_features = torch.tensor(test_features, dtype=torch.float)
    test_labels = torch.tensor(test_labels, dtype=torch.float)

    train_dataset = torch.utils.data.TensorDataset(train_features, train_labels)
    test_dataset = torch.utils.data.TensorDataset(test_features, test_labels) 
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

In [12]:
finetune_train_loader, finetune_test_loader = load_finetune_data(batch_size = 4)

### train/test loop


In [102]:
# train loop
def train_model(model, data_loader, epochs, lr=0.1, optim=None, weight_decay=None, p=True):
  model.to(device)
  if optim is None:
    if weight_decay is None:
      optimizer = torch.optim.SGD(params=model.parameters(), lr=lr)
    else:
      optimizer = torch.optim.SGD(params=model.parameters(), lr=lr, weight_decay=weight_decay)
  else:
    if weight_decay is None:
      optimizer = optim(model.parameters(), lr=lr)
    else:
      optimizer = optim(model.parameters(), lr=lr, weight_decay=weight_decay)

  loss_fn = nn.MSELoss()
  epoch_loss = []
  for epoch in tqdm(range(epochs)):
    epoch_loss.append(0)
    for batch, (X, y) in enumerate(data_loader):
      X = X.to(device)
      y = y.to(device)
      y_pred = model(X)
      loss = loss_fn(y_pred, y)
      epoch_loss[-1] += loss.item()
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
    if p:
      print(f"average batch loss in {epoch+1}: {epoch_loss[-1]/len(data_loader)}")
      



# test loop
def test_model(model, data_loader):
  loss_fn = nn.MSELoss() 
  model.to(device)
  Y = torch.tensor([]).to(device)
  Y_pred = torch.tensor([]).to(device)
  with torch.no_grad():
    for batch, (X,y) in enumerate(data_loader):
      X = X.to(device)
      y = y.to(device)
      y_pred = model(X)
      Y = torch.cat((Y, y))
      Y_pred = torch.cat((Y_pred, y_pred))
    loss = torch.sqrt(loss_fn(y_pred, y))
    print(f"average batch loss: {loss.item()}")

In [100]:
class net(nn.Module):
    def __init__(self):
        torch.manual_seed = 17
        super(net, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=6, stride=2, kernel_size=3)
        self.conv2 = nn.Conv1d(in_channels=6, out_channels=16, stride=2, kernel_size=3)
        self.conv3 = nn.Conv1d(in_channels=16, out_channels=120, stride=2, kernel_size=3)
        self.fc1 = nn.Linear(120*124, 84)
        self.fc2 = nn.Linear(84, 1)

    def forward(self, x):
        x = x.unsqueeze_(dim=1)
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv3(x))
        x = x.view(-1, 120*124)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        x = x.squeeze_(dim=1)
        return x
    
# using batch normalization
class normalized_net(nn.Module):
    def __init__(self):
        torch.manual_seed = 17
        super(normalized_net, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=6, stride=2, kernel_size=3)
        self.bn1 = nn.BatchNorm1d(6)
        self.conv2 = nn.Conv1d(in_channels=6, out_channels=16, stride=2, kernel_size=3)
        self.bn2 = nn.BatchNorm1d(16)
        self.conv3 = nn.Conv1d(in_channels=16, out_channels=32, stride=2, kernel_size=3)
        self.bn3 = nn.BatchNorm1d(32)
        self.conv4 = nn.Conv1d(in_channels=32, out_channels=64, stride=2, kernel_size=3)
        self.bn4 = nn.BatchNorm1d(64)
        self.conv5 = nn.Conv1d(in_channels=64, out_channels=128, stride=2, kernel_size=3)
        self.bn5 = nn.BatchNorm1d(128)
        self.fc1 = nn.Linear(128*30, 84)
        self.fc2 = nn.Linear(84, 1)

    def forward(self, x):
        x = x.unsqueeze_(dim=1) # need the x = x.unsqueeze_(dim=1) so gradient computation works
        x = torch.relu(self.bn1(self.conv1(x)))
        x = torch.relu(self.bn2(self.conv2(x)))
        x = torch.relu(self.bn3(self.conv3(x)))
        x = torch.relu(self.bn4(self.conv4(x)))
        x = torch.relu(self.bn5(self.conv5(x)))
        x = x.view(-1, 128*30)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        x = x.squeeze_(dim=1)    
        return x


### comparing some hyperparamters

In [None]:
test = next(iter(train_loader))[0]
dev_model = normalized_net()
out = dev_model(test)

torch.Size([64, 128, 30])


In [None]:
dev_model =  normalized_net()
train_model(dev_model, train_loader, epochs=15, optim = torch.optim.Adam, lr=0.001, weight_decay=0.002)
print('---')
test_model(dev_model, train_loader)
print('---')
test_model(dev_model, test_loader)

average batch loss in 1: 0.04832211886197329
average batch loss in 2: 0.015955748527497052
average batch loss in 3: 0.0138593875028193
average batch loss in 4: 0.013848679214715958
average batch loss in 5: 0.013406210947781801
average batch loss in 6: 0.013128400990366936
average batch loss in 7: 0.013793663085997105
average batch loss in 8: 0.013128213630616664
average batch loss in 9: 0.013136810804903507
average batch loss in 10: 0.01320596416592598
average batch loss in 11: 0.013335453416407109
average batch loss in 12: 0.012644454278796912
average batch loss in 13: 0.012736681837588549
average batch loss in 14: 0.012520046799629926
average batch loss in 15: 0.012515485768765211
---
average batch loss: 1.4018318057060243e-05 | accuracy: 27602/40000 | accuracy in percent 69.005
---
average batch loss: 7.285112455771986e-05 | accuracy: 6773/10000 | accuracy in percent 67.73


In [None]:
dev_model =  normalized_net()

train_model(dev_model, train_loader, epochs=15, optim = torch.optim.Adagrad, lr=0.001, weight_decay=0.001)
print('---')
test_model(dev_model, train_loader)
print('---')
test_model(dev_model, test_loader)


average batch loss in 1: 0.052456893715262416
average batch loss in 2: 0.018803144995868205
average batch loss in 3: 0.015965714767575263
average batch loss in 4: 0.014213927049189806
average batch loss in 5: 0.013095104674994945
average batch loss in 6: 0.012271047741174698
average batch loss in 7: 0.01156871896237135
average batch loss in 8: 0.01102673379331827
average batch loss in 9: 0.01059519100189209
average batch loss in 10: 0.010167258009314537
average batch loss in 11: 0.009883010215312243
average batch loss in 12: 0.009588859386742116
average batch loss in 13: 0.009299005978554487
average batch loss in 14: 0.00901506588086486
average batch loss in 15: 0.008847360903769731
---
average batch loss: 9.972722083330155e-06 | accuracy: 29922/40000 | accuracy in percent 74.805
---
average batch loss: 8.344114016575418e-05 | accuracy: 7031/10000 | accuracy in percent 70.31


In [None]:
dev_model =  normalized_net()
train_model(dev_model, train_loader, epochs=15, lr=0.01)
print('---')
test_model(dev_model, train_loader)
print('---')
test_model(dev_model, test_loader)

average batch loss in 1: 0.4968699249774218
average batch loss in 2: 0.03491322933137417
average batch loss in 3: 0.02677759014368057
average batch loss in 4: 0.022675443471968173
average batch loss in 5: 0.01888897882774472
average batch loss in 6: 0.01708962717205286
average batch loss in 7: 0.014661124294251204
average batch loss in 8: 0.014020600125193596
average batch loss in 9: 0.013597100345045328
average batch loss in 10: 0.012233984691649675
average batch loss in 11: 0.012298735515773297
average batch loss in 12: 0.012327293568104506
average batch loss in 13: 0.01067632727175951
average batch loss in 14: 0.010132453045248985
average batch loss in 15: 0.010068597088754178
---
average batch loss: 9.880167245864868e-06 | accuracy: 30787/40000 | accuracy in percent 76.9675
---
average batch loss: 0.00010748181468362262 | accuracy: 7219/10000 | accuracy in percent 72.19


In [74]:
dev_model =  normalized_net()
train_model(dev_model, train_loader, epochs=15, lr=0.01)
print('---')
test_model(dev_model, train_loader)
print('---')
test_model(dev_model, test_loader)

average batch loss in 1: 0.4692110815644264
average batch loss in 2: 0.03254912094771862
average batch loss in 3: 0.024455007752776144
average batch loss in 4: 0.02313570466041565
average batch loss in 5: 0.020421098506450654
average batch loss in 6: 0.01878229928314686
average batch loss in 7: 0.017239975012093782
average batch loss in 8: 0.014939844016730786
average batch loss in 9: 0.013761905759572982
average batch loss in 10: 0.013486493415385485
average batch loss in 11: 0.012677794007211923
average batch loss in 12: 0.01172583431005478
average batch loss in 13: 0.011562794194370509
average batch loss in 14: 0.01144865373969078
average batch loss in 15: 0.010884522700309754
---
average batch loss: 0.1082547977566719
---
average batch loss: 0.10792825371026993


### pretrain


In [103]:
pretrained_model =  normalized_net()
train_model(pretrained_model, train_loader, epochs=30, lr=0.01)
print('---')
test_model(pretrained_model, train_loader)
print('---')
test_model(pretrained_model, test_loader)



  3%|▎         | 1/30 [00:02<01:21,  2.80s/it]

average batch loss in 1: 0.45400825462043287


  7%|▋         | 2/30 [00:05<01:10,  2.52s/it]

average batch loss in 2: 0.03324480162411928


 10%|█         | 3/30 [00:07<01:03,  2.34s/it]

average batch loss in 3: 0.026463318206369876


 13%|█▎        | 4/30 [00:09<00:59,  2.29s/it]

average batch loss in 4: 0.018085084749758243


 17%|█▋        | 5/30 [00:11<00:56,  2.25s/it]

average batch loss in 5: 0.017630003628879787


 20%|██        | 6/30 [00:13<00:54,  2.26s/it]

average batch loss in 6: 0.016621013481169938


 23%|██▎       | 7/30 [00:16<00:57,  2.48s/it]

average batch loss in 7: 0.015120367111265659


 27%|██▋       | 8/30 [00:18<00:52,  2.37s/it]

average batch loss in 8: 0.014003087665885688


 30%|███       | 9/30 [00:21<00:48,  2.30s/it]

average batch loss in 9: 0.013488452656567097


 33%|███▎      | 10/30 [00:23<00:44,  2.24s/it]

average batch loss in 10: 0.012285828217864036


 37%|███▋      | 11/30 [00:25<00:41,  2.21s/it]

average batch loss in 11: 0.011755147004872561


 40%|████      | 12/30 [00:27<00:40,  2.27s/it]

average batch loss in 12: 0.010776967500895261


 43%|████▎     | 13/30 [00:30<00:40,  2.41s/it]

average batch loss in 13: 0.010149496012181044


 47%|████▋     | 14/30 [00:32<00:37,  2.32s/it]

average batch loss in 14: 0.0113375015437603


 50%|█████     | 15/30 [00:34<00:34,  2.27s/it]

average batch loss in 15: 0.010579911043122411


 53%|█████▎    | 16/30 [00:36<00:31,  2.21s/it]

average batch loss in 16: 0.009368298490345478


 57%|█████▋    | 17/30 [00:38<00:28,  2.19s/it]

average batch loss in 17: 0.008999977856129408


 60%|██████    | 18/30 [00:41<00:28,  2.41s/it]

average batch loss in 18: 0.008866982467100024


 63%|██████▎   | 19/30 [00:44<00:27,  2.48s/it]

average batch loss in 19: 0.008766584837436676


 67%|██████▋   | 20/30 [00:46<00:23,  2.38s/it]

average batch loss in 20: 0.008470799985527992


 70%|███████   | 21/30 [00:48<00:20,  2.29s/it]

average batch loss in 21: 0.008682735307142139


 73%|███████▎  | 22/30 [00:50<00:17,  2.24s/it]

average batch loss in 22: 0.008284158957749605


 77%|███████▋  | 23/30 [00:53<00:15,  2.21s/it]

average batch loss in 23: 0.0076819450981915


 80%|████████  | 24/30 [00:55<00:13,  2.33s/it]

average batch loss in 24: 0.007597086875140667


 83%|████████▎ | 25/30 [00:58<00:11,  2.39s/it]

average batch loss in 25: 0.00751025112643838


 87%|████████▋ | 26/30 [01:00<00:09,  2.31s/it]

average batch loss in 26: 0.007210653605312109


 90%|█████████ | 27/30 [01:02<00:06,  2.26s/it]

average batch loss in 27: 0.007294732500612736


 93%|█████████▎| 28/30 [01:04<00:04,  2.25s/it]

average batch loss in 28: 0.006886829763650894


 97%|█████████▋| 29/30 [01:06<00:02,  2.22s/it]

average batch loss in 29: 0.006887848288938403


100%|██████████| 30/30 [01:09<00:00,  2.32s/it]

average batch loss in 30: 0.006635884273797274
---





average batch loss: 0.09174182265996933
---
average batch loss: 0.10016132891178131


In [76]:
torch.save(pretrained_model.state_dict(), 'drive/MyDrive/SolarEnergyMaterials/PretrainedModels/model0.pth')

### finetune


In [77]:
class pretrained_net(nn.Module):
    def __init__(self):
        torch.manual_seed = 17
        super(pretrained_net, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=6, stride=2, kernel_size=3)
        self.bn1 = nn.BatchNorm1d(6)
        self.conv2 = nn.Conv1d(in_channels=6, out_channels=16, stride=2, kernel_size=3)
        self.bn2 = nn.BatchNorm1d(16)
        self.conv3 = nn.Conv1d(in_channels=16, out_channels=32, stride=2, kernel_size=3)
        self.bn3 = nn.BatchNorm1d(32)
        self.conv4 = nn.Conv1d(in_channels=32, out_channels=64, stride=2, kernel_size=3)
        self.bn4 = nn.BatchNorm1d(64)
        self.conv5 = nn.Conv1d(in_channels=64, out_channels=128, stride=2, kernel_size=3)
        self.bn5 = nn.BatchNorm1d(128)
        self.fc1 = nn.Linear(128*30, 84)
        self.fc2 = nn.Linear(84, 1)
        self.load_state_dict(torch.load('drive/MyDrive/SolarEnergyMaterials/PretrainedModels/model0.pth'),strict=True)
        # freezing pretrained layers
        for param in model.parameters():
          param.requires_grad = False
        self.fc = nn.Linear(128*30, 1)

    def forward(self, x):
        x = x.unsqueeze_(dim=1) # need the x = x.unsqueeze_(dim=1) so gradient computation works
        x = torch.relu(self.bn1(self.conv1(x)))
        x = torch.relu(self.bn2(self.conv2(x)))
        x = torch.relu(self.bn3(self.conv3(x)))
        x = torch.relu(self.bn4(self.conv4(x)))
        x = torch.relu(self.bn5(self.conv5(x)))
        x = x.view(-1, 128*30)
        x = self.fc(x)
        x = x.squeeze_(dim=1)    
        return x

In [121]:
model = pretrained_net()
train_model(model, finetune_train_loader, epochs=75, lr=0.0001, p=False)
test_model(model,finetune_train_loader)
test_model(model,finetune_test_loader)

100%|██████████| 75/75 [00:02<00:00, 27.96it/s]

average batch loss: 0.06577936559915543
average batch loss: 0.33720239996910095



