In [85]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torch
import torchvision
import torchvision.transforms as transforms

from tqdm.notebook import tqdm

import sysc
import os
sys.path.append(os.path.dirname(os.path.dirname(os.getcwd()))+"\\MySQL Database")
from CreateDB import CreateDB

from io import BytesIO
from PIL import Image

import numpy as np

from sklearn.model_selection import train_test_split

In [5]:
db = CreateDB('root', 'Liverpool11*')

In [145]:
query = """
SELECT image
FROM images
"""

labels_query = """
SELECT HBondDonorCount
FROM properties
"""

properties_query = """
SELECT 
"""

images = db.fetch_query('pubchem_database', query)

labels = db.fetch_query('pubchem_database', labels_query)

img_list = []

for i in range(len(images)):
    
    img_single = list(Image.open(BytesIO(images[i][0])).getdata())
    
    img_list.append(img_single)
    
img_array = np.array(img_list).reshape(-1, 1, 100, 100)

img_tensor = torch.from_numpy(img_array)

labels_array = np.array([x[0] for x in labels]).reshape(-1, 1)

labels_tensor = torch.from_numpy(labels_array)

X_train, X_test, y_train, y_test = train_test_split(img_tensor, labels_tensor, train_size=0.8, random_state=0)

train_dataset = torch.utils.data.TensorDataset(X_train, y_train)

test_dataset = torch.utils.data.TensorDataset(X_test, y_test)

In [148]:
batch_size = 4

train_loader = torch.utils.data.DataLoader(train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True, 
                                           num_workers=2)

test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          num_workers=2)

In [196]:
class Net(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.dropout1 = nn.Dropout(p=0.2)
        self.conv1 = nn.Conv2d(1, 3, 5)
        self.batchnorm1 = nn.BatchNorm2d(3)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout2 = nn.Dropout(p=0.2)
        self.conv2 = nn.Conv2d(3, 5, 5)
        self.batchnorm2 = nn.BatchNorm2d(5)
        self.fc1 = nn.Linear(5*22*22, 100)
        self.fc2 = nn.Linear(100, 50)
        self.fc3 = nn.Linear(50, 1)
        
    def forward(self, x):
        x = self.dropout1(x)
        x = self.conv1(x)
        x = self.batchnorm1(x)
        x = F.relu(x)
        x = self.pool(x)
        x = self.dropout2(x)
        x = self.conv2(x)
        x = self.batchnorm2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = torch.flatten(x, start_dim=1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

In [197]:
net = Net()

In [198]:
def train_net(num_epochs, train_loader):
    
    net.train()
    
    criterion = nn.MSELoss()
    
    optimizer = optim.Adam(net.parameters(), lr=0.001)
    
    losses_over_time = []
    
    for epoch in range(num_epochs):

        running_loss = 0.0

        for batch_i, data in enumerate(train_loader, 0):

            inputs, labels = data

            optimizer.zero_grad()

            outputs = net(inputs.float())
            
            #print(f'input shape: {labels.size()}')
            #print(f'output shape: {outputs.size()}')

            loss = criterion(outputs, labels.float())

            loss.backward()

            optimizer.step()

            running_loss += loss.item()

            if batch_i % 2000 == 1999:
                print(f'[{epoch + 1}, {batch_i + 1:5d}] loss: {running_loss/2000:.3f}')
                losses_over_time.append(running_loss)
                running_loss = 0.0
    
    return losses_over_time
                
    print('Finished Training')

In [156]:
losses_over_time = train_net(10, train_loader)

[1,  2000] loss: 3.351
[1,  4000] loss: 1.620
[2,  2000] loss: 1.451
[2,  4000] loss: 1.866
[3,  2000] loss: 1.253
[3,  4000] loss: 1.241
[4,  2000] loss: 1.495
[4,  4000] loss: 1.364
[5,  2000] loss: 1.209
[5,  4000] loss: 0.947
[6,  2000] loss: 1.037
[6,  4000] loss: 1.062
[7,  2000] loss: 1.002
[7,  4000] loss: 1.377
[8,  2000] loss: 0.851
[8,  4000] loss: 1.022
[9,  2000] loss: 1.042
[9,  4000] loss: 0.973
[10,  2000] loss: 0.873
[10,  4000] loss: 0.828


In [157]:
def eval(test_loader):
    
    net.eval()
    
    test_loss_total = 0
    
    for data in test_loader:
        
        test_image, test_label = data
    
        y_pred = net(test_image.float())

        test_loss = criterion(y_pred, test_label.float())
        
        test_loss_total += test_loss.item()

    print('Average test loss is {}'.format(test_loss_total/(len(test_loader) * test_loader.batch_size)))
    
    return test_loss_total/(len(test_loader) * test_loader.batch_size)

In [158]:
test_loss = eval(test_loader)

Average test loss is 0.22547994496533647
