In [1]:
!pip3 install torch torchaudio torchvision torchtext torchdata

Collecting torchdata
  Downloading torchdata-0.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64

In [2]:
import sqlite3
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor, Lambda
import torch
import os
import numpy as np

In [3]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(DEVICE)

cuda:0


In [4]:
database = "/content/drive/MyDrive/peclet_batch_one.db"
files = "/content/drive/MyDrive/peclet_batch_one"

In [5]:
connection = sqlite3.connect(database)
cursor = connection.cursor()
cursor.execute("SELECT DISTINCT \"model_param.seed\" FROM model_run_params")
seeds = [r[0] for r in cursor.fetchall()]
split = int((len(seeds)/10)*8)
train_seeds = seeds[:split]
test_seeds = seeds[split:]
train_filter = "WHERE \"model_param.seed\" in %s" % str(tuple(train_seeds))
test_filter = "WHERE \"model_param.seed\" in %s" % str(tuple(test_seeds))

In [6]:
label_query = "SELECT \"model_param.streampower.k\"/ \"model_param.diffuser.D\" FROM model_run_params"

In [7]:
def get_runs(database, filter_query = ""):
    connection = sqlite3.connect(database)
    cursor = connection.cursor()
    run_query = f"SELECT model_run_id FROM model_run_params {filter_query}"
    cursor.execute(run_query)
    return [r[0] for r in cursor.fetchall()]

class LandlabBatchdataset(Dataset):
    def __init__(self, database, dataset_dir, label_query, filter_query=None):
        self.img_db = database
        self.dataset_directory = dataset_dir
        self.connection = sqlite3.connect(database)
        self.cursor = connection.cursor()
        self.label_query = label_query
        if filter_query is not None:
            self.filter_query = filter_query
        else:
            self.filter_query = ""
        self.runs = get_runs(database, filter_query)
        self.transform = ToTensor()
        self.target_transform = ToTensor()


    def __len__(self):
        return len(self.runs)

    def __getitem__(self, idx):
        run_name = self.runs[idx]
        data_path = os.path.join(self.dataset_directory, f"{run_name}.npz")
        label_query = f"{self.label_query} WHERE model_run_id = \"{run_name}\""
        self.cursor.execute(label_query)
        label = self.cursor.fetchone()
        data_array = np.load(data_path)[run_name]
        data_array = data_array.astype(np.float32)
        data_array = self.transform(data_array)
        label = torch.tensor(label, dtype=torch.float32)
        return data_array, label

In [8]:
class PecletPredictor(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(1, 20, kernel_size=(5,5))
    self.relu1 = nn.ReLU()
    self.maxpool1 = nn.MaxPool2d(kernel_size = (2,2), stride=(2,2))
    self.conv2 = nn.Conv2d(20, 50, (5,5))
    self.relu2 = nn.ReLU()
    self.maxpool2 = nn.MaxPool2d(kernel_size = (2,2), stride=(2,2))
    self.fc1 = nn.Linear(50*72*22, 800) #why lol
    self.fc2 = nn.Linear(800, 100)
    self.fc3 = nn.Linear(100,1)
    self.relu3 = nn.ReLU()
    self.relu4 = nn.ReLU()

  def forward(self, x):
    #print(f"initial shape: {x.shape}")
    x = self.conv1(x)
    #print(f"shape after conv 1(1->20): {x.shape}")
    x = self.relu1(x)
    #print(f"shape after relu: {x.shape}")
    x = self.maxpool1(x)
    #print(f"shape after maxpool: {x.shape}")

    x = self.conv2(x)
    #print(f"shape after conv1 (20->50) {x.shape}")
    x = self.relu2(x)
    #print(f"shape after relu: {x.shape}")
    x = self.maxpool2(x)
    #print(f"shape after maxpool: {x.shape}")
    x = torch.flatten(x, 1)
    x = self.fc1(x)
    x = self.relu3(x)
    x = self.fc2(x)
    x = self.relu4(x)
    x = self.fc3(x)
    return x

net = PecletPredictor()

criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)


In [9]:
batch_size = 4
trainset = LandlabBatchdataset(database, files, label_query, train_filter)
testset = LandlabBatchdataset(database, files, label_query, test_filter)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

In [None]:
for epoch in range(5):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad(set_to_none=False)

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        try:
          loss.backward()
        except RuntimeError:
          print(outputs, labels)
          print([type(o) for o in outputs], [type[l] for l in labels])
          raise RuntimeError
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

In [None]:
running_loss

In [None]:
loss = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        loss += criterion(outputs, labels)
print(loss/len(testloader))