In [1]:
import torch
from torch.utils.data import Dataset, DataLoader    # dataset representation and loading
import torch.nn as nn                     # neural networks
import torch.nn.functional as F           # layers, activations and more
import torch.optim as optim               # optimizers e.g. gradient descent, ADAM, etc.
import numpy as np
import csv
import pdb
from sklearn.metrics import accuracy_score

In [2]:
device = None
if torch.cuda.is_available():
    print("Using the GPU")
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')
    print("NOTE: Using CPU")

NOTE: Using CPU


In [3]:
class CSVDataset(Dataset): 
    # 9 feats: trip count, num ops, num operands, num mem ops, num fops, num branches, est resmii, frequent path length, depth of loop

    def __init__(self, csv_path):
        csv_reader = csv.reader(open(csv_path, 'r'), delimiter=',')

        # line = [prob_name, f1, ... , f9, rank1, ..., rank8]
        self.csv_lines = []
        for line in csv_reader:
            row = [float(item) for item in line[1:10]]
            row.append(int(line[10]))
            self.csv_lines.append(row)
    
    def __len__(self):
        return len(self.csv_lines)
    
    def __getitem__(self, idx):
        line = self.csv_lines[idx]
        X = torch.FloatTensor(line[:-1])
        y = torch.zeros(8)
        y[line[-1] - 1] = 1
        return X, y

In [7]:
class MLP(nn.Module):
    def __init__(self,input_size,hidden_dim_1,hidden_dim_2,hidden_dim_3, hidden_dim_4, num_classes,drop_prob=0.5):
      super(MLP, self).__init__()
      #Put GeneModel architecture here (WITHOUT THE FINAL LAYERS)
      self.hidden_dim_1=hidden_dim_1
      self.hidden_dim_2=hidden_dim_2
      self.num_classes=num_classes
      
      #self.flantten=torch.flatten()
      self.fc_1=nn.Linear(input_size,hidden_dim_1)
      self.fc_2=nn.Linear(hidden_dim_1,hidden_dim_2)
      self.fc_3=nn.Linear(hidden_dim_2,hidden_dim_3)
      self.fc_4=nn.Linear(hidden_dim_3,hidden_dim_4)
      self.fc_5=nn.Linear(hidden_dim_4,num_classes)

      self.batchnorm1 = nn.BatchNorm1d(hidden_dim_1)
      self.batchnorm2 = nn.BatchNorm1d(hidden_dim_2)
      self.batchnorm3 = nn.BatchNorm1d(hidden_dim_3)
      self.batchnorm4 = nn.BatchNorm1d(hidden_dim_4)

      self.relu = nn.ReLU()
      self.dropout = nn.Dropout(p=drop_prob)

    def forward(self, x):
      #Put GeneModel architecture here (WITHOUT THE FINAL LAYERS)
      x=x.float()
      x=self.batchnorm1(self.fc_1(x))
      x=self.dropout(x)
      x=self.relu(x)
      x=self.batchnorm2(self.fc_2(x))
      x=self.dropout(x)
      x=self.relu(x)
      x=self.batchnorm3(self.fc_3(x))
      x=self.dropout(x)
      x=self.relu(x)
      x=self.batchnorm4(self.fc_4(x))
      x=self.dropout(x)
      x=self.relu(x)
      x=self.fc_5(x)
      return x
    
    def train_model(self, train_loader, optimizer, criterion):
      train_loss = 0.0
      for X, y in train_loader:
        # Setup 
        X = X
        y = y
        optimizer.zero_grad()

        # Send batch through 
        pred = mlp(X)
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        #print(accuracy_score(torch.max(y.cpu(), dim=1)[1], torch.max(pred.cpu(), dim=1)[1]))

      return train_loss

    def validate_model(self, val_loader, criterion):
      val_loss_sum = 0.0
      true = []
      predlist = []
      for X, y in val_loader:
        X = X
        y = y
        
        pred = mlp(X)
        true += y.tolist()
        predlist += pred.tolist()
        #print(pred)
        #print(y)
        ##print(torch.max(pred, dim=1)[1], torch.max(y, dim=1)[1])
        val_loss = criterion(pred, y)
        val_loss_sum += val_loss.item()
      #print(torch.max(torch.tensor(true), dim=1)[1], torch.max(torch.tensor(predlist), dim=1)[1])
      return val_loss_sum / len(val_loader), accuracy_score(torch.max(torch.tensor(true), dim=1)[1], torch.max(torch.tensor(predlist), dim=1)[1])

In [5]:
# Setup model and hyper parameters

batch_size = 32


# Setup dataloaders
train_csv = "new_train.csv"
val_csv = "new_val.csv"
train_dataset = CSVDataset(train_csv)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataset = CSVDataset(val_csv)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

In [8]:
torch.manual_seed(42)
features = range(9)

#Try different neuron configurations (625 total)

##################################################
#
#What you need to modify:
#MLP Class
#MLP definition
#for loop
#...
##################################################

results = {}
best_results = ("", 0, 0)
h_dim1 = [8, 16, 32, 4, 2]
for h_d1 in [8]:
  for h_d2 in h_dim1:
    for h_d3 in h_dim1:
      for h_d4 in h_dim1:
        mlp = MLP(input_size=9, hidden_dim_1=h_d1, hidden_dim_2=h_d2, hidden_dim_3=h_d3, hidden_dim_4=h_d4,num_classes=8, drop_prob=0.0)
        lr = 0.001
        optimizer = optim.Adam(mlp.parameters(), lr=lr)
        epochs = 300
        class_weights = torch.Tensor([0.7054, 1.1513, 1.2625, 1.585, 1.6872, 0.6032, 0.8136, 1.2327])
        criterion = nn.CrossEntropyLoss(weight=class_weights)
        results[str(h_d1) + " " + str(h_d2) + " " +  str(h_d3) + " " + str(h_d4)] = (0, 0)
        for i, epoch in enumerate(range(epochs)):
            print(f"EPOCH {i} / {epochs - 1}")
            
            # Set to train() mode and run one epoch
            mlp.train()
            t_loss = mlp.train_model(train_loader, optimizer, criterion)
            print("Training loss =", t_loss / len(train_loader))

            # Set to eval() mode and run one epoch
            with torch.no_grad():
              mlp.eval()
              v_loss, accuracy = mlp.validate_model(val_loader, criterion)
              if results[str(h_d1) + " " + str(h_d2) + " " +  str(h_d3) + " " + str(h_d4)][1] < accuracy:
                results[str(h_d1) + " " + str(h_d2) + " " +  str(h_d3) + " " + str(h_d4)] = (i, accuracy)
            print("Validation loss =", v_loss , accuracy)
        if (results[str(h_d1) + " " + str(h_d2) + " " +  str(h_d3) + " " + str(h_d4)][1] > best_results[2]):
            best_results = (str(h_d1) + " " + str(h_d2) + " " +  str(h_d3) + " " + str(h_d4), 
                            results[str(h_d1) + " " + str(h_d2) + " " +  str(h_d3) + " " + str(h_d4)][0], 
                            results[str(h_d1) + " " + str(h_d2) + " " +  str(h_d3) + " " + str(h_d4)][1])
        print(best_results)
print(results)
print(best_results)

    

EPOCH 0 / 299
Training loss = 2.202341710743697
Validation loss = 2.1873093791629956 0.1262002743484225
EPOCH 1 / 299
Training loss = 2.1881135067214137
Validation loss = 2.179130414257879 0.12208504801097393
EPOCH 2 / 299
Training loss = 2.181497363940529
Validation loss = 2.185459313185319 0.10973936899862825
EPOCH 3 / 299
Training loss = 2.1730149414228355
Validation loss = 2.1804068088531494 0.1056241426611797
EPOCH 4 / 299
Training loss = 2.1754024883975154
Validation loss = 2.1764230209848154 0.13580246913580246
EPOCH 5 / 299
Training loss = 2.1710263710954916
Validation loss = 2.1748850034630816 0.10973936899862825
EPOCH 6 / 299
Training loss = 2.1692332495813784
Validation loss = 2.181378825851109 0.11934156378600823
EPOCH 7 / 299
Training loss = 2.1684489340885826
Validation loss = 2.172219234964122 0.10973936899862825
EPOCH 8 / 299
Training loss = 2.1692424934843313
Validation loss = 2.1739296653996343 0.11385459533607682
EPOCH 9 / 299
Training loss = 2.1684403302876847
Valid

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Training loss = 2.129271938748982
Validation loss = 2.2007933440415757 0.11522633744855967
EPOCH 187 / 299
Training loss = 2.133053335158721
Validation loss = 2.2047671649767007 0.11385459533607682
EPOCH 188 / 299
Training loss = 2.136389990215716
Validation loss = 2.2051727616268657 0.11659807956104253
EPOCH 189 / 299
Training loss = 2.1318147312039915
Validation loss = 2.211862641832103 0.09876543209876543
EPOCH 190 / 299
Training loss = 2.137327887441801
Validation loss = 2.2147042647652 0.11934156378600823
EPOCH 191 / 299
Training loss = 2.1360903369343798
Validation loss = 2.202455945636915 0.10425240054869685
EPOCH 192 / 299
Training loss = 2.1350734985393025
Validation loss = 2.1979000309239263 0.11248285322359397
EPOCH 193 / 299
Training loss = 2.130066206921702
Validation loss = 2.2069231790045034 0.11659807956104253
EPOCH 194 / 299
Training loss = 2.1294180232545603
Validation loss = 2.2120585234268852 0.10425240054869685
EPOCH 195 / 299
Training loss = 2.1377937223600303
Val

In [None]:
# Optional save notebook to 