In [3]:
import torch
import numpy as np
import torch.nn as nn
import pandas as pd
import re
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import median_absolute_error
from sklearn.metrics import r2_score
import torchvision.transforms as transforms
from torchvision.io import read_image
from torch.utils.data import Dataset
from sklearn.model_selection import KFold
import random

In [None]:
#Random states for splits
[[42,12],[23,5],[37,51]]

In [46]:
gen_path = 'Desktop/Gala_Salts/'
path_to_data = gen_path + 'full_data/'
path_to_out = gen_path + 'split_2/'
Y = np.array(pd.read_csv(path_to_data+'Y_ions.csv', sep=';'))

#Form 3 data splits for cross-validation

Y_trn, Y_30 = train_test_split(Y, test_size=0.3, random_state=23)
Y_vld, Y_tst = train_test_split(Y_30, test_size = 0.3333, random_state=5)

a = ['sample_number','Cu','Ni','Cr','NO3']
pd.DataFrame(Y_trn).to_csv(path_to_out + 'Y_trn.csv',sep=',', header = a)
pd.DataFrame(Y_vld).to_csv(path_to_out + 'Y_vld.csv',sep=',', header = a)
pd.DataFrame(Y_tst).to_csv(path_to_out + 'Y_tst.csv',sep=',', header = a)

In [15]:
#Customized dataset class

class CDS_Dataset(Dataset):
    def __init__(self, annotations_file, spec_dir, transform=None, target_transform=None):
        self.spec_labels = pd.read_csv(annotations_file, sep=',').iloc[:,1:]
        self.spec_number = pd.read_csv(annotations_file, sep=',').iloc[:,0]
        self.spec_dir = spec_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.spec_labels)

    def __getitem__(self, idx):
        label = self.spec_labels.iloc[idx]
        spec = torch.from_numpy(np.array(pd.read_csv(self.spec_dir + str(self.spec_number[idx])+'.csv').iloc[1:,1:-1], dtype='float32')).unsqueeze(0)
        if self.transform:
            spec = self.transform(spec)
        if self.target_transform:
            label = self.target_transform(label)
        return spec, torch.from_numpy(np.array(label, dtype='float32'))

In [57]:
#Import data to calculate mean and std
training_data = CDS_Dataset(path_to_out+'Y_trn.csv',path_to_data)
validation_data = CDS_Dataset(path_to_out+'Y_vld.csv',path_to_data)
test_data = CDS_Dataset(path_to_out+'Y_tst.csv',path_to_data)

from torch.utils.data import DataLoader

train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
validation_dataloader = DataLoader(validation_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

In [None]:
#Calculate mean and std
mean = 0.0
for specs, _ in train_dataloader:
    batch_samples = specs.size(0) 
    specs = specs.view(batch_samples, specs.size(1), -1)
    mean += specs.mean(2).sum(0)
mean = mean / len(train_dataloader.dataset)

var = 0.0
for specs, _ in train_dataloader:
    batch_samples = specs.size(0)
    specs = specs.view(batch_samples, specs.size(1), -1)
    var += ((specs - mean.unsqueeze(1))**2).sum([0,2])
std = torch.sqrt(var / (len(train_dataloader.dataset)*500*41))

print('mean:', mean, 'std:', std)

In [16]:
#Import and Normalize data
training_data = CDS_Dataset(path_to_out+'Y_trn.csv',path_to_data, transform= transforms.Normalize((mean), (std)) )
validation_data = CDS_Dataset(path_to_out+'Y_vld.csv',path_to_data, transform= transforms.Normalize((mean), (std)) )
test_data = CDS_Dataset(path_to_out+'Y_tst.csv',path_to_data, transform= transforms.Normalize((mean), (std)) )

train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
validation_dataloader = DataLoader(validation_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

In [23]:
#2D CNN class
class TwoLayerCNN(nn.Module):

    def __init__(self):
      
        super().__init__() # since Python 3.0
        

        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)

        self.fc1 = nn.Linear(16 * 244 * 14, 160)
        self.fc2 = nn.Linear(160, 4)

    def forward(self, x):

        x = self.pool(F.relu(self.conv1(x)))
        x = F.relu(self.conv2(x))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [24]:
#Model instance
N = TwoLayerCNN()
print(N)

TwoLayerCNN(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=54656, out_features=160, bias=True)
  (fc2): Linear(in_features=160, out_features=4, bias=True)
)


In [25]:
import torch
import torch.optim as optim
import torch.nn.functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)


# Training

import torch.optim as optim

loss_function = torch.nn.MSELoss().cuda()
optimizer = optim.Adam(N.parameters(), lr=0.001)

test_stop = 100
max_val_loss = 10000.0
for epoch_step in range(0, 1000, test_stop):
    if epoch_step!=0:
        N = TwoLayerCNN()
        optimizer = optim.Adam(N.parameters(), lr=0.001)

        checkpoint = torch.load(path_to_out+'model.pth')
        N.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        last_best_epoch = checkpoint['epoch']
        loss = checkpoint['loss']
        N.train()
        
        if last_best_epoch + test_stop > ep:
            for ep in range(epoch_step, epoch_step+test_stop):
                for _, data in enumerate(train_dataloader, 0): # get bacth
                    inputs, labels = data # parse batch
                    optimizer.zero_grad() # sets the gradients of all optimized tensors to zero.
                    outputs = N(inputs) # get outputs
                    loss = loss_function(outputs, labels) # calculate loss
                    loss.backward() # calculate gradients
                    optimizer.step() # performs a single optimization step (parameter update).

                for specs, labels in validation_dataloader: val_loss = loss_function(N(specs),labels)
                if val_loss.item() <= max_val_loss:
                    torch.save({'epoch': ep,
                                'model_state_dict': N.state_dict(),
                                'optimizer_state_dict': optimizer.state_dict(),
                                'loss': loss}, path_to_out+'model.pth')
                    max_val_loss = val_loss.item()
                print(f"Epoch={ep} loss={loss:.4f} val_loss={val_loss:.4f}")
        else: 
            continue 

    if epoch_step==0:
        for ep in range(epoch_step, test_stop):
            for _, data in enumerate(train_dataloader, 0): # get bacth
                inputs, labels = data # parse batch
                optimizer.zero_grad() # sets the gradients of all optimized tensors to zero.
                outputs = N(inputs) # get outputs
                loss = loss_function(outputs, labels) # calculate loss
                loss.backward() # calculate gradients
                optimizer.step() # performs a single optimization step (parameter update).
            
            for specs, labels in validation_dataloader: val_loss = loss_function(N(specs),labels)
            if val_loss.item() <= max_val_loss:
                torch.save({'epoch': ep,
                            'model_state_dict': N.state_dict(),
                            'optimizer_state_dict': optimizer.state_dict(),
                            'loss': loss}, path_to_out+'model.pth')
                max_val_loss = val_loss.item()
            print(f"Epoch={ep} loss={loss:.4f}")
    
   

Using device: cpu
Epoch=0 loss=16.0266
Epoch=1 loss=7.8551
Epoch=2 loss=3.9673
Epoch=3 loss=3.6440
Epoch=4 loss=4.0431
Epoch=5 loss=2.8213
Epoch=6 loss=3.2827
Epoch=7 loss=2.7267
Epoch=8 loss=3.1312
Epoch=9 loss=3.0848
Epoch=10 loss=2.8178
Epoch=11 loss=2.4554
Epoch=12 loss=2.8846
Epoch=13 loss=2.2139
Epoch=14 loss=3.1007
Epoch=15 loss=3.5549
Epoch=16 loss=2.8327
Epoch=17 loss=2.2032
Epoch=18 loss=2.3103
Epoch=19 loss=2.1975
Epoch=20 loss=2.3770
Epoch=21 loss=2.2277
Epoch=22 loss=2.4514
Epoch=23 loss=2.0719
Epoch=24 loss=2.3634
Epoch=25 loss=2.4054
Epoch=26 loss=2.8007
Epoch=27 loss=2.4998
Epoch=28 loss=2.1727
Epoch=29 loss=2.0390
Epoch=30 loss=2.1908
Epoch=31 loss=2.3135
Epoch=32 loss=1.4218
Epoch=33 loss=1.6513
Epoch=34 loss=2.1037
Epoch=35 loss=1.6589
Epoch=36 loss=1.5670
Epoch=37 loss=2.4620
Epoch=38 loss=1.9223
Epoch=39 loss=2.2418
Epoch=40 loss=2.0712
Epoch=41 loss=1.9443
Epoch=42 loss=1.4474
Epoch=43 loss=1.7702
Epoch=44 loss=1.8785
Epoch=45 loss=1.5434
Epoch=46 loss=1.4556
Epoc

Epoch=261 loss=0.2175 val_loss=0.4906
Epoch=262 loss=0.3163 val_loss=0.5647
Epoch=263 loss=0.2528 val_loss=0.6403
Epoch=264 loss=0.2896 val_loss=0.5949
Epoch=265 loss=0.2023 val_loss=0.8346
Epoch=266 loss=0.3244 val_loss=1.3161
Epoch=267 loss=0.4328 val_loss=0.4861
Epoch=268 loss=0.2153 val_loss=0.8139
Epoch=269 loss=0.2423 val_loss=1.9095
Epoch=270 loss=0.3192 val_loss=0.5862
Epoch=271 loss=0.2109 val_loss=1.0847
Epoch=272 loss=0.2667 val_loss=0.8853
Epoch=273 loss=0.3636 val_loss=0.8413
Epoch=274 loss=0.2780 val_loss=0.6094
Epoch=275 loss=0.1596 val_loss=0.6169
Epoch=276 loss=0.1855 val_loss=0.7514
Epoch=277 loss=0.1146 val_loss=0.4959
Epoch=278 loss=0.2319 val_loss=0.5868
Epoch=279 loss=0.2698 val_loss=0.5645
Epoch=280 loss=0.2646 val_loss=0.4939
Epoch=281 loss=0.1603 val_loss=0.1703
Epoch=282 loss=0.1198 val_loss=0.5997
Epoch=283 loss=0.1987 val_loss=0.5987
Epoch=284 loss=0.1701 val_loss=0.5183
Epoch=285 loss=0.1990 val_loss=0.8463
Epoch=286 loss=0.1705 val_loss=0.5450
Epoch=287 lo

In [26]:
#Get Statistics

import torch.optim as optim
import torch.nn.functional as F

N = TwoLayerCNN()
optimizer = optim.Adam(N.parameters(), lr=0.001)

checkpoint = torch.load(path_to_out+'model.pth')
N.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
last_best_epoch = checkpoint['epoch']
loss = checkpoint['loss']
N.eval()

y_ae_test = np.zeros((1,4))
for specs, labels in test_dataloader:
    outputs = N(specs)
    outputs[outputs<0]=0
    ae = torch.abs(outputs-labels).detach().numpy()
    np.concatenate((y_ae_test, ae), axis=0)
    y_ae_test = np.concatenate((y_ae_test, ae), axis=0)
    
y_ae_trn = np.zeros((1,4))
for specs, labels in train_dataloader:
    outputs = N(specs)
    outputs[outputs<0]=0
    ae = torch.abs(outputs-labels).detach().numpy()
    np.concatenate((y_ae_trn, ae), axis=0)
    y_ae_trn = np.concatenate((y_ae_trn, ae), axis=0)
    
y_ae_vld = np.zeros((1,4))
for specs, labels in validation_dataloader:
    outputs = N(specs)
    outputs[outputs<0]=0
    ae = torch.abs(outputs-labels).detach().numpy()
    np.concatenate((y_ae_vld, ae), axis=0)
    y_ae_vld = np.concatenate((y_ae_vld, ae), axis=0)

In [27]:
#Write Network's answers

a = ['Cu','Ni','Cr','NO3']

pd.DataFrame(y_ae_test).to_csv(path_to_out+ 'Y_out_tst.csv',sep=',', header = a)
pd.DataFrame(y_ae_trn).to_csv(path_to_out + 'Y_out_trn.csv',sep=',', header = a)
pd.DataFrame(y_ae_vld).to_csv(path_to_out + 'Y_out_vld.csv',sep=',', header = a)
