<a href="https://colab.research.google.com/github/simpleParadox/Private-RE/blob/main/project_622.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers --quiet
!pip install pyvacy --quiet

[K     |████████████████████████████████| 5.5 MB 18.1 MB/s 
[K     |████████████████████████████████| 7.6 MB 49.0 MB/s 
[K     |████████████████████████████████| 163 kB 31.8 MB/s 
[?25h  Building wheel for pyvacy (setup.py) ... [?25l[?25hdone


In [2]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import torch.nn.functional as F
import torch.optim as optim

# Import the transformers library for the retrieving the BERT embeddings.
import transformers
from transformers import BertModel, BertTokenizer


# Import pyvacy for privacy preserving optimizers.
from pyvacy import optim as private_optim, analysis

# Import scikit-learn packages.
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.utils import gen_batches


# Import scientific computing python packages.
import pandas as pd
import numpy as np      
import matplotlib.pyplot as plt

# Additional packages.
from google.colab import drive
from tqdm import tqdm
import csv
from typing import List


# Using gpu if available.
device = "cuda:0" if torch.cuda.is_available() else "cpu"

In [3]:
device

'cuda:0'

## Read in Erin's tabular data and preprocess it.

In [4]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# relations_path = '/content/drive/MyDrive/CMPUT 622 project/data/tabular_data/Input_all_29_relation.tsv'
relations_path = '/content/drive/MyDrive/CMPUT 622 project/data/tabular_data/Input_500_29_relation.tsv'

train_data = pd.read_csv(relations_path, encoding='utf-8', sep = '\t')

print(train_data.shape[0])

589617


In [None]:
train_data.isnull().sum()

table_caption          564370
table_section_title      8792
headers                   482
entity pair                 0
relation                    0
dtype: int64

In [None]:
train_data.head()

Unnamed: 0,table_caption,table_section_title,headers,entity pair,relation
0,Recipients,Recipients,Name of the recipient,Nishan-e-Haider Saif Ali Janjua Saif Ali Janj...,award.award_nominated_work.award_nominations.....
1,Recipients,Recipients,Name of the recipient,Nishan-e-Haider Raja Muhammad Sarwar Raja Muh...,award.award_nominated_work.award_nominations.....
2,Recipients,Recipients,Name of the recipient,Nishan-e-Haider Tufail Mohammad Tufail Mohammad,award.award_nominated_work.award_nominations.....
3,Recipients,Recipients,Name of the recipient,Nishan-e-Haider Raja Aziz Bhatti Raja Aziz Bh...,award.award_nominated_work.award_nominations.....
4,Recipients,Recipients,Name of the recipient,Nishan-e-Haider Rashid Minhas Rashid Minhas,award.award_nominated_work.award_nominations.....


In [None]:
train_data.fillna("", inplace = True)

# Shuffle data so that there is a higher chance of the train and test data being from the same distribution.
train_data = shuffle(train_data, random_state = 1)


labels = train_data.iloc[:,-1].values
sentences = train_data.iloc[:,:-1].values.tolist()

sentences = [' '.join(sent).strip() for sent in sentences]

label = preprocessing.LabelEncoder()
y = label.fit_transform(train_data['relation'])
label_mappings = integer_mapping = {i: l for i, l in enumerate(label.classes_)}

In [None]:
train_data[:2]

Unnamed: 0,table_caption,table_section_title,headers,entity pair,relation
580310,,Scorers and assistants,Position Name,DF Marko Lomić Marko Lomić,soccer.football_position.players
128294,,Short films,Title Genre,Bramadero Bramadero Erotic,film.film.genre


In [None]:
sentences[:2]

['Scorers and assistants Position Name DF  Marko Lomić Marko Lomić',
 'Short films Title Genre  Bramadero Bramadero  Erotic']

In [None]:
label_mappings

{0: 'None',
 1: 'award.award_nominated_work.award_nominations..award.award_nomination.award_nominee',
 2: 'book.author.works_written',
 3: 'book.book.genre',
 4: 'business.company.industry',
 5: 'education.educational_institution.students_graduates..education.educational_institution.students_graduates',
 6: 'film.actor.film..film.performance.character',
 7: 'film.director.film',
 8: 'film.film.country',
 9: 'film.film.genre',
 10: 'film.film.language',
 11: 'film.film.music',
 12: 'film.film.production_companies',
 13: 'film.performance.actor..film.performance.film',
 14: 'film.producer.film',
 15: 'film.writer.film',
 16: 'government.political_party.politicians_in_this_party',
 17: 'location.location.contains',
 18: 'music.artist.album',
 19: 'music.artist.origin',
 20: 'people.deceased_person.place_of_death',
 21: 'people.person.nationality',
 22: 'people.person.parents',
 23: 'people.person.place_of_birth',
 24: 'people.person.profession',
 25: 'people.person.religion',
 26: 'people

In [None]:
train_data.isnull().sum()

table_caption          0
table_section_title    0
headers                0
entity pair            0
relation               0
dtype: int64

## **Read Sententence-level Data**

In [None]:
train_directory_path = '/content/drive/MyDrive/CMPUT 622 project/data/semeval/train.txt'
test_directory_path = '/content/drive/MyDrive/CMPUT 622 project/data/semeval/test.txt'

In [None]:
relation_to_id = [
    "other", 
    "Entity-Destination(e1,e2)",
    "Cause-Effect(e2,e1)",        
    "Member-Collection(e2,e1)",      
    "Entity-Origin(e1,e2)",        
    "Message-Topic(e1,e2)",        
    "Component-Whole(e2,e1)",       
    "Component-Whole(e1,e2)",       
    "Instrument-Agency(e2,e1)",     
    "Product-Producer(e2,e1)",     
    "Content-Container(e1,e2)",     
    "Cause-Effect(e1,e2)",          
    "Product-Producer(e1,e2)",       
    "Content-Container(e2,e1)",    
    "Entity-Origin(e2,e1)",          
    "Message-Topic(e2,e1)",        
    "Instrument-Agency(e1,e2)",       
    "Member-Collection(e1,e2)",      
    "Entity-Destination(e2,e1)"]    

In [None]:
def convertText_csv(path):
  output: List[List[str]] = []

  with open(path) as file:
    lines = file.read()
    lines =  lines.splitlines()

  for line in lines:
    line = line.strip()
    input = line.split(sep="\t")
    entity1 = input[0]
    entity2 = input[1]
    relation = input[2]
    sentence = input[3]

    sentence = sentence.replace('<e1>', '')
    sentence = sentence.replace('<e2>', '')
    sentence = sentence.replace('</e1>', '')
    sentence = sentence.replace('</e2>', '')
    
    output.append([sentence, entity1, entity2, relation])
  return output

In [None]:
def writeOutput(output, path):
  with open(path, 'w', newline='') as f:
    writer = csv.writer(f, delimiter='\t')
    writer.writerow(["sentence", "entity1", "entity2", "relation"])
    for i in output:
      writer.writerow(i)

In [None]:
def loadSemEvalDateset(inputFilename, outputFilename):
  writeOutput(convertText_csv(inputFilename), outputFilename)
  data = pd.read_csv(outputFilename, encoding='utf-8', sep = '\t')

  data = shuffle(data, random_state = 1) 
  #print(data[:1])

  labels = data.iloc[:,-1].values
  features = data.iloc[:,:-1].values.tolist()
  sentences = [' '.join(i).strip() for i in features]

  label = preprocessing.LabelEncoder()
  y = label.fit_transform(data['relation'])
  label_mappings = integer_mapping = {i: l for i, l in enumerate(label.classes_)}
  return sentences, y, label_mappings

In [None]:
x_train, y_train, map = loadSemEvalDateset(train_directory_path, "train.tsv")

In [None]:
len(y_train)

8000

In [None]:
x_train[0]

'An  invoice  is a commercial document indicating the  products  , quantities , and agreed prices for products or services the seller has provided the buyer . invoice products'

In [None]:
x_test, y_test, map = loadSemEvalDateset(test_directory_path, "test.tsv")

In [None]:
x_test[0] # probably test and train files have same data

'Police found in front of one of the bombed establishments an improvised  bomb  inside a papaya  fruit  left under a table at Delecta Bakeshop Friday by two teenage suspects . bomb fruit'

In [None]:
y_test.shape

(2717,)

In [None]:
y_train = torch.tensor(y_train)
y_train

tensor([14, 13, 15,  ...,  0, 16, 13])

## Initialize the pretrained BERT model (uncased) and the respective tokenizer.

### NOTE: We might need to tokenize and encode everything before running the model.

### Get BERT embeddings

In [None]:
def get_bert_embeds_from_tokens(bert_model, encoded_inputs):
    all_bert_embeds = []
    bert_model = bert_model.to(device)  # Put the bert_model on the GPU.
    for i in tqdm(range(len(encoded_inputs))):
        encoded_input = encoded_inputs[i]
        encoded_input = encoded_input.to(device)  # Put the encoded input on the GPU.
        # print("encoded input: ", type(encoded_input))
        outputs = bert_model(**encoded_input)
        hidden_states = outputs['last_hidden_state']
        # pooler_output = outputs['pooler_output'][0]  # We don't need this for our analysis.
        all_bert_embeds.append(hidden_states)
        # all_bert_embeds.append(np.squeeze(pooler_output.cpu().detach().numpy()))
    return all_bert_embeds

def bert_tokenize(texts, tokenizer):
    all_encoded_inputs = []
    # bert_model = bert_model.to(device)
    
    for i in tqdm(range(len(texts))):
        text = texts[i]
        encoded_input = tokenizer(text, return_tensors='pt', padding="max_length", max_length=50, truncation=True)
        all_encoded_inputs.append(encoded_input)
        
    return all_encoded_inputs

In [None]:
def get_labels(y_pred):
    y_pred_label = np.zeros((len(y_pred),1))
    print(y_pred_label.shape)
    for index in range(len(y_pred)):
        y_pred_label[index] = np.argmax(y_pred[index])
    return y_pred_label

TESTING

### Putting the tokenizer into a function.

In [None]:
# def bert_tokenize_and_get_embeds(texts, bert_model, tokenizer):
#     all_encoded_inputs = []
#     bert_model = bert_model.to(device)
    
#     for i in tqdm(range(len(texts))):
#         text = texts[i]
#         encoded_input = tokenizer(text, return_tensors='pt', padding="max_length", max_length=50, truncation=True).to(device)
#         outputs = bert_model(**encoded_input)
#         hidden_states = outputs['last_hidden_state']
#         all_encoded_inputs.append(np.squeeze(hidden_states.cpu().detach().numpy()))
        
#     return all_encoded_inputs

### Define the BertTokenizer and the BertModel from the transformers library.

In [None]:
# Define the BertModel and the BertTokenizer
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', model_max_length=50, padding_side='right')
bert_model = BertModel.from_pretrained('bert-base-uncased')

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


### Encode the inputs and store them so that we don't have re-encode everytime we run the model.

In [None]:
seeds = [0]   # Change the actual seed value here.
all_train_last_hidden_states = []
all_test_last_hidden_states = []
# NOTE: Since colab is running out of memory, you can process this in batches and then concatenate the results. See if this works. If not, then move to Compute Canada.
for seed in seeds:
    X_train_texts, X_test_texts, y_train_classes, y_test_classes = train_test_split(sentences[:1000], y[:1000], random_state=seed, test_size=0.2)

    # slices = gen_batches(len(X_train_texts), 1000)
    # for batch_num, s in enumerate(slices):
        # print("Batch num: ", batch_num)

        # Now do the tokenization and the encoding process.
    train_tokens = bert_tokenize(X_train_texts, bert_tokenizer)

    test_tokens = bert_tokenize(X_test_texts, bert_tokenizer)

    # # Now get the encodings from BERT. NOTE: The get_bert_embeds_from_tokens function only returns the last_hidden_state vector for the input.
    last_hidden_states_train = get_bert_embeds_from_tokens(bert_model, train_tokens)
    del train_tokens
    last_hidden_states_test = get_bert_embeds_from_tokens(bert_model, test_tokens)
    del test_tokens

    # # Store the hidden states
    all_train_last_hidden_states.append(last_hidden_states_train)
    all_test_last_hidden_states.append(last_hidden_states_test)


    # np.savez_compressed(f"embeds/tabular_data/{dataset_size}/train_embeds_pooler_output_seed_{seed}.npz", all_train_last_hidden_states)
    # np.savez_compressed(f"embeds/tabular_data/{dataset_size}/test_embeds_pooler_output_seed_{seed}.npz", all_test_last_hidden_states)

100%|██████████| 800/800 [00:00<00:00, 1799.39it/s]
100%|██████████| 200/200 [00:00<00:00, 1727.95it/s]
  0%|          | 0/800 [00:00<?, ?it/s]


RuntimeError: ignored

In [None]:
len(all_train_last_hidden_states[0])

In [None]:
y_train_classes.shape

## Model definition and training




### Implement the model

In [16]:
class erin_model(nn.Module):
    def __init__(self, in_size=768, hidden_size: int = 1, num_relations: int = 29):
        super(erin_model,self).__init__()

        # Just add one LSTM unit as the model followed by a fully connected layer and then a softmax.

        self.lstm = nn.LSTM(input_size=in_size, hidden_size=hidden_size, num_layers=1)
        self.fc = nn.Linear(hidden_size, num_relations)

    def forward(self, x):
        x, (h_n, c_n) = self.lstm(x)
        # print("LSTM output: ", x)
        # print("LSTM output: ", x.size())
        # x = x.view(x.size(0), -1)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        output = F.softmax(x)
        # print("model output: ", output)
        return output

### Convert numpy to PyTorch TensorDataset and then into DataLoader

In [None]:
# Load the preprocessed data from google drive.
sentences = np.load("/content/drive/MyDrive/CMPUT 622 project/data/embeds/tabular_data/small/train_embeds_pooler_output_seed_0.npz", allow_pickle=True)['arr_0'][0]
labels = np.load("/content/drive/MyDrive/CMPUT 622 project/data/embeds/tabular_data/small/train_labels_seed_0.npz", allow_pickle=True)['arr_0']

In [None]:
# Load test data and labels.
test_sentences = np.load("/content/drive/MyDrive/CMPUT 622 project/data/embeds/tabular_data/small/test_embeds_pooler_output_seed_0.npz", allow_pickle=True)['arr_0'][0]
test_labels = np.load("/content/drive/MyDrive/CMPUT 622 project/data/embeds/tabular_data/small/test_labels_seed_0.npz", allow_pickle=True)['arr_0']

In [None]:
len(sentences)
len(labels)

471693

In [None]:
# Do this separetely inside for training and test data (both features and labels)
# Confirm if you need to convert the discrete values into one hot vectors or not. I think we need to but double check it to make sure.
# x_data_train = np.array(all_train_last_hidden_states[0])
batch_size = 16
tensor_x_train = torch.Tensor(sentences)
tensor_y_train = torch.Tensor(labels)
train_dataset = TensorDataset(tensor_x_train, tensor_y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size)



In [None]:
tensor_x_test = torch.Tensor(test_sentences)
tensor_y_test = torch.Tensor(test_labels)
test_dataset = TensorDataset(tensor_x_test, tensor_y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [None]:
model = erin_model()

In [None]:
print(model)

erin_model(
  (lstm): LSTM(768, 1)
  (fc): Linear(in_features=1, out_features=29, bias=True)
)


### Implement training loop

In [None]:
model = erin_model()  # Enter input size
model.to(device)

# optimizer = optim.SGD(model.parameters(), lr=0.01)
optimizer = optim.Adam(model.parameters(), lr=0.001)
# optimizer = optim.DPSGD(params=model.parameters(), **training_parameters)  # Define training parameters.

# epsilon = analysis.epsilon(**training_parameters)
criterion = nn.CrossEntropyLoss()

In [None]:
epochs = 10
for epoch in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data
        labels = labels.type(torch.LongTensor)

        inputs = inputs.to(device)
        labels = labels.to(device)  # Put the data on the gpu if available.

        # Reset the optimizer to have zero-gradients.
        optimizer.zero_grad()

        # Forward pass.
        outputs = model(inputs)
        
        
        # Calculate loss.
        loss = criterion(outputs, labels)
        
        # Calculate gradients.
        loss.backward()

        # Update weights.
        optimizer.step()


        # Calculate loss for debugging.
        # running_loss = loss.item()
        # if epoch % 10 == 0:
        #     print(f'[{epoch + 1}] loss: {running_loss:.3f}')
        running_loss += loss.item()
        if i % 100 == 99:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0




[1,   100] loss: 0.168
[1,   200] loss: 0.168
[1,   300] loss: 0.168
[1,   400] loss: 0.168
[1,   500] loss: 0.168
[1,   600] loss: 0.168
[1,   700] loss: 0.167
[1,   800] loss: 0.167
[1,   900] loss: 0.167
[1,  1000] loss: 0.167
[1,  1100] loss: 0.166
[1,  1200] loss: 0.166
[1,  1300] loss: 0.166
[1,  1400] loss: 0.166
[1,  1500] loss: 0.166
[1,  1600] loss: 0.166
[1,  1700] loss: 0.165
[1,  1800] loss: 0.166
[1,  1900] loss: 0.165
[1,  2000] loss: 0.165
[1,  2100] loss: 0.165
[1,  2200] loss: 0.165
[1,  2300] loss: 0.165
[1,  2400] loss: 0.164
[1,  2500] loss: 0.164
[1,  2600] loss: 0.165
[1,  2700] loss: 0.164
[1,  2800] loss: 0.164
[1,  2900] loss: 0.163
[1,  3000] loss: 0.164
[1,  3100] loss: 0.164
[1,  3200] loss: 0.164
[1,  3300] loss: 0.164
[1,  3400] loss: 0.163
[1,  3500] loss: 0.163
[1,  3600] loss: 0.164
[1,  3700] loss: 0.163
[1,  3800] loss: 0.162
[1,  3900] loss: 0.163
[1,  4000] loss: 0.163
[1,  4100] loss: 0.163
[1,  4200] loss: 0.163
[1,  4300] loss: 0.162
[1,  4400] 

In [None]:
""

### Evaluating model performance on test data.

In [None]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        labels = labels.type(torch.LongTensor)

        inputs = inputs.to(device)
        labels = labels.to(device)  # Put the data on the gpu if available.
        # calculate outputs by running images through the network
        outputs = model(inputs)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        print(outputs, predicted)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
         1.2770e-10, 1.4348e-10, 1.4787e-10, 5.9074e-04, 1.0687e-10],
        [1.0585e-09, 6.9522e-10, 7.8111e-10, 6.3864e-10, 9.2222e-10, 1.0887e-09,
         9.2500e-10, 7.0726e-10, 3.5667e-09, 9.9414e-10, 1.0604e-15, 4.2068e-03,
         4.5740e-04, 7.9670e-10, 6.2660e-10, 8.1899e-10, 8.5451e-10, 6.9420e-10,
         6.0485e-10, 4.7555e-10, 1.1071e-09, 6.1618e-01, 7.7260e-10, 1.2586e-09,
         1.1239e-09, 7.0780e-10, 1.0835e-09, 3.7915e-01, 7.4175e-10],
        [3.1738e-10, 2.5284e-10, 2.0567e-10, 2.3505e-10, 2.0662e-10, 2.0853e-10,
         2.2719e-10, 2.2839e-10, 5.0462e-10, 2.4616e-10, 2.6903e-04, 8.0609e-01,
         1.7441e-08, 2.0506e-10, 2.3914e-10, 1.9562e-10, 2.5162e-10, 3.1787e-10,
         2.4194e-10, 5.8913e-10, 3.0722e-10, 1.9140e-01, 2.0345e-10, 3.6755e-10,
         2.8639e-10, 2.9975e-10, 3.2427e-10, 2.2406e-03, 2.3280e-10],
        [1.2609e-09, 8.5279e-10, 9.1222e-10, 7.8481e-10, 1.0509e-09, 1.2114e-

### Calculate F1 of the results

In [None]:
# Use all_labels and all_predictions from the previous cell.
score = f1_score(labels.cpu().data, predicted.cpu(), average='micro')
score

0.25

In [None]:
labels

tensor([20, 21, 16, 12], device='cuda:0')

In [None]:
predicted

tensor([21, 27, 21, 12], device='cuda:0')

In [None]:



import torch
import torchvision
import torchvision.transforms as transforms


transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 4

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


dataiter = iter(trainloader)
images, labels = next(dataiter)
print(labels)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
tensor([9, 7, 1, 5])


In [None]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # print("Inputs: ", inputs)
        print("Lables: ", labels)
        break

        # zero the parameter gradients
        # optimizer.zero_grad()

        # # forward + backward + optimize
        # outputs = net(inputs)
        # loss = criterion(outputs, labels)
        # loss.backward()
        # optimizer.step()

        # # print statistics
        # running_loss += loss.item()
        # if i % 2000 == 1999:    # print every 2000 mini-batches
        #     print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
        #     running_loss = 0.0

print('Finished Training')

Lables:  tensor([2, 7, 6, 8])
Lables:  tensor([1, 9, 7, 1])
Finished Training


**testing semeval**

In [5]:
device

'cuda:0'

In [6]:
# Load the preprocessed data from google drive.
sentences = np.load("/content/drive/MyDrive/CMPUT 622 project/data/embeds/semeval/train_embeds_seed_0.npz", allow_pickle=True)['arr_0'][0]
labels = np.load("/content/drive/MyDrive/CMPUT 622 project/data/embeds/semeval/train_labels_seed_0.npz", allow_pickle=True)['arr_0']

In [7]:
# Load test data and labels.
test_sentences = np.load("/content/drive/MyDrive/CMPUT 622 project/data/embeds/semeval/test_embeds_seed_0.npz", allow_pickle=True)['arr_0'][0]
test_labels = np.load("/content/drive/MyDrive/CMPUT 622 project/data/embeds/semeval/test_labels_seed_0.npz", allow_pickle=True)['arr_0']

In [12]:
print(len(sentences), len(labels))


8000 8000


In [13]:
batch_size = 20
tensor_x_train = torch.Tensor(sentences)
tensor_y_train = torch.Tensor(labels)
train_dataset = TensorDataset(tensor_x_train, tensor_y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size)

In [14]:
tensor_x_test = torch.Tensor(test_sentences)
tensor_y_test = torch.Tensor(test_labels)
test_dataset = TensorDataset(tensor_x_test, tensor_y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [19]:
model = erin_model(num_relations = 19)
model

erin_model(
  (lstm): LSTM(768, 1)
  (fc): Linear(in_features=1, out_features=19, bias=True)
)

In [20]:
model = erin_model()  # Enter input size
model.to(device)

# optimizer = optim.SGD(model.parameters(), lr=0.01)
optimizer = optim.Adam(model.parameters(), lr=0.001)
# optimizer = optim.DPSGD(params=model.parameters(), **training_parameters)  # Define training parameters.

# epsilon = analysis.epsilon(**training_parameters)
criterion = nn.CrossEntropyLoss()

In [21]:
epochs = 10
for epoch in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data
        labels = labels.type(torch.LongTensor)

        inputs = inputs.to(device)
        labels = labels.to(device)  # Put the data on the gpu if available.

        # Reset the optimizer to have zero-gradients.
        optimizer.zero_grad()

        # Forward pass.
        outputs = model(inputs)
        
        
        # Calculate loss.
        loss = criterion(outputs, labels)
        
        # Calculate gradients.
        loss.backward()

        # Update weights.
        optimizer.step()


        # Calculate loss for debugging.
        # running_loss = loss.item()
        # if epoch % 10 == 0:
        #     print(f'[{epoch + 1}] loss: {running_loss:.3f}')
        running_loss += loss.item()
        if i % 100 == 99:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0




[1,   100] loss: 0.168
[1,   200] loss: 0.168
[1,   300] loss: 0.167
[1,   400] loss: 0.167
[2,   100] loss: 0.167
[2,   200] loss: 0.166
[2,   300] loss: 0.166
[2,   400] loss: 0.165
[3,   100] loss: 0.165
[3,   200] loss: 0.164
[3,   300] loss: 0.164
[3,   400] loss: 0.164
[4,   100] loss: 0.164
[4,   200] loss: 0.163
[4,   300] loss: 0.163
[4,   400] loss: 0.163
[5,   100] loss: 0.164
[5,   200] loss: 0.163
[5,   300] loss: 0.163
[5,   400] loss: 0.163
[6,   100] loss: 0.163
[6,   200] loss: 0.163
[6,   300] loss: 0.163
[6,   400] loss: 0.162
[7,   100] loss: 0.163
[7,   200] loss: 0.163
[7,   300] loss: 0.163
[7,   400] loss: 0.162
[8,   100] loss: 0.163
[8,   200] loss: 0.162
[8,   300] loss: 0.162
[8,   400] loss: 0.162
[9,   100] loss: 0.163
[9,   200] loss: 0.162
[9,   300] loss: 0.162
[9,   400] loss: 0.162
[10,   100] loss: 0.163
[10,   200] loss: 0.162
[10,   300] loss: 0.162
[10,   400] loss: 0.162


In [26]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        labels = labels.type(torch.LongTensor)

        inputs = inputs.to(device)
        labels = labels.to(device)  # Put the data on the gpu if available.
        # calculate outputs by running images through the network
        outputs = model(inputs)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        #print(outputs, predicted)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

Accuracy of the network on the 10000 test images: 16 %




In [23]:
score = f1_score(labels.cpu().data, predicted.cpu(), average='micro')
score

0.23529411764705882

In [24]:
predicted

tensor([18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18],
       device='cuda:0')

In [25]:
labels

tensor([14, 18, 13,  0, 18, 11,  8, 18,  2,  6,  4, 13, 18, 13, 11,  1,  0],
       device='cuda:0')