#Import & Setup

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import numpy as np
import torch.optim as optim
import time
import pandas as pd
import glob
from math import sqrt
import numpy as np
from tqdm import tqdm

#Neural Net

In [None]:
def split(batches, size):
  ret = []
  mx = int(batches[0].shape[0] * size) 
  ret.append( [batches[0][:mx], batches[1][:mx]])
  ret.append( [batches[0][mx:], batches[1][mx:]])
  return ret 

def test(net, batches, loss_func):
  total_loss = 0 
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
  for b in range(len(batches[0])):
    x = batches[0][b]
    y = batches[1][b]
    #print(x.shape, y.shape)
    batch_x = x.to(device).float()
    batch_y = y.to(device).float()
    #print("batches")
    #print(type(batch_x))
    outputs = net(batch_x)
    loss = loss_func(outputs, batch_y)
    total_loss += loss.item()
  
  total_loss /= len(batches[0])
  return total_loss

def train_net(net, batches, lr = 0.01, epoch = 1000):  
    optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay = 0.00003)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    start_time = time.time()
    net.train()
    
    running_loss = 0.0
    steps = 0
    loss_func = nn.MSELoss()

    train_batches, rest_batches = split(batches, 0.6)
    val_batches, test_batches = split(rest_batches, 0.5)
    print(train_batches[0].shape, train_batches[1].shape)
    print(val_batches[0].shape, val_batches[1].shape)
    print(test_batches[0].shape, test_batches[1].shape)
    for ep in range(epoch):
        cur_loss = 0
        for b in range(len(train_batches[0])):
            optimizer.zero_grad()
            
            x = train_batches[0][b]
            y = train_batches[1][b]
            #print(x.shape, y.shape)
            batch_x = x.to(device).float()
            batch_y = y.to(device).float()
            #print("batches")
            #print(type(batch_x))
            outputs = net(batch_x)
            loss = loss_func(outputs, batch_y)
            loss.backward()
            optimizer.step()
            steps += 1
            running_loss += loss.item()
            cur_loss += loss.item()
        if ep % 5 == 0:
            time_lapse = time.strftime('%H:%M:%S', time.gmtime(time.time() - start_time))
            val_loss = test(net, val_batches, loss_func)
            epoch_loss = cur_loss / len(train_batches[0])
            print(f'Epoch:{ep:2d} |Time: {time_lapse} | Train Loss: {epoch_loss:.4f} | Validation Loss: {val_loss:.4f} | Average Loss: {running_loss / steps:.4f}')
    return net, test(net, test_batches, loss_func)

In [None]:
#main
def train_save_model(data, file, csv_path):
  dest_path = file.split("/")[:-1]
  file_name = file.split("/")[-1]
  file_name = file_name.split(".")[0]
  dest_path = "/".join(dest_path) + "/" + file_name + ".pt"
  
  df = pd.read_csv(csv_path, index_col = False)
  if any(file_name in x for x in df['Name'].tolist()):
    print("ALREADY DONE!! :)", file_name)
    return
  dataset = Dataset(data, batch_size = 64)
  batches = dataset.generate_batch()
  print(batches[0].shape)
  input_dim = batches[0].shape[2]
  net = myNet(input_dim, layer_num=2)
  net, test_error = train_net(net, batches, epoch = 1000)
  
  print(file)
  print(dest_path)
  torch.save(net, dest_path)
  
  df = df.append({'Name': file, 'Test_Error' : test_error, 'Dataset_size' : data.shape[0] }, ignore_index=True)
  df.to_csv(csv_path, index=False)

In [None]:

import torch
import torch.nn as nn


class myNet(nn.Module):
    def __init__(self,
                 input_dim: int,
                 layer_num: int = 2, 
                 neuron_num: int = -1
                 ) :
 
        super(myNet, self).__init__()
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self._layer_num = layer_num
        if neuron_num < 0:
          neuron_num = input_dim 
        if(layer_num == 2):
          self.linear_predictor = nn.Sequential(
              nn.Linear(input_dim, neuron_num),
              nn.Sigmoid(),
              nn.Linear(neuron_num, input_dim) 
          )
          print("hola")
        else:
          self.linear_predictor = nn.Sequential(
              nn.Linear(input_dim, input_dim), 
          )
        
    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
        #print("Vow")
        inputs = inputs.float()
        outputs = self.linear_predictor(inputs)
        #print("done vow")
        return outputs



In [None]:
class Dataset:
    def __init__(self, data, batch_size=32):
        
        self.batch_size = batch_size
        self.data = data 

    def generate_batch(self):
        
        batches_x = []
        batches_y = []
        current_batch_x = []
        current_batch_y = []
        for x,y in self.data:
          if(len(current_batch_x) == self.batch_size):
            batches_x.append(current_batch_x)
            batches_y.append(current_batch_y)
            current_batch_x = []
            current_batch_y = []
          current_batch_x.append(x)
          current_batch_y.append(y)
        #batches.append(current_batch)
        
        return torch.tensor(batches_x), torch.tensor(batches_y)


#Main

In [None]:
#main
def neuron_exp(data, file, csv_path = "/content/gdrive/My Drive/CSE 534/Project/FCN_data/Nodes_vs_Neurons.csv"):
  dataset = Dataset(data, batch_size = 64)
  batches = dataset.generate_batch()
  print(batches[0].shape)
  input_dim = batches[0].shape[2]
  for neurons in [10, 25, 50, 100, 150, 225]:
    net = myNet(input_dim, neuron_num = neurons)
    net, test_error = train_net(net, batches, epoch = 1000)
    
    df = pd.read_csv(csv_path, index_col = False)
    df = df.append({'Name': file, 'Test_Error' : test_error, 'Neurons' : neurons}, ignore_index=True)
    df.to_csv(csv_path, index=False)

In [None]:
def read_and_create_models(directory = "/content/gdrive/My Drive/CSE 534/Project/FCN_data/saturation_1"):
  pathname = directory + "/**/*.txt"
  files = glob.glob(pathname, recursive=True)

  count = 0
  yolo = 0
  for file in tqdm(files):
      #print(file)
      if "readme" not in file and "README" not in file:
          with open(file, "r") as f:
              lines = f.readlines()
              if(len(lines) < 500):
                print("SMALL!",len(lines), file)
                continue
              ok = 0
              data = []
              
              for line in lines:
                array = line.split()[:-1]
                array = [float(num) for num in array]
                L = len(array)
                if L % 2 != 0:
                    # print(file)
                    # print("L is not an even number!!!")
                    # print()
                    continue
                n = sqrt(L/2)
                if n.is_integer():
                    n = int(n)
                else:
                    # print(file)
                    # print("not square")
                    # print()
                    continue
                arr1 = np.array(array[:n**2])
                arr2 = np.array(array[n**2:])
                x = np.reshape(arr1, (n, n))
                y = np.reshape(arr2, (n, n))
                if(len(data) > 0):
                  prev_x = data[-1][0]
                  prev_y = data[-1][1] 
                  if(prev_x.shape != x.shape or prev_x.shape != y.shape):
                     continue
                data.append([x, y])
                if any(np.diagonal(x).tolist()) or any(np.diagonal(y).tolist()):
                    # print(file)
                    # print("Diagonal is not 0")
                    # print()
                    continue
                ok+=1
          if ok == len(lines):
                count += 1
                data = np.array(data)
                print("data shape", data.shape, "lines", len(lines), file)
                train_save_model(data, file, "/content/gdrive/My Drive/CSE 534/Project/FCN_data/errors_saturation.csv")
                #break 
      print(count)
      #break  
  # print("Okay files:", count)

In [None]:
read_and_create_models()