In [2]:
# copyright @ Ziming Li
# version 1.0
# This code is created alone by Ziming Li, a Ph.D student from Tsinghua University, China.
# This code is for the final project of the my summer internship in Yunzhixin'an Technology Co., LTD, Zhengzhou, China.
# If you have any questions, please contact me by email: lzm22@mails.tsinghua.edu.cn

In [73]:
import torch
import pickle
import numpy as np
import csv
import copy

In [74]:
# load the trained model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

trained_model = torch.load('./trained_model/checkpoint_run.pth')

In [75]:
def predict_with_model(model, batched_data, device):
    model.eval()
    sent_data = batched_data

    batch_size, seqlen, _ = sent_data.shape

    now_state = torch.rand(batch_size, model.num_units)

    probability_distribution = []
    for i in range(seqlen):
        incoming = sent_data[:, i]
        incoming, now_state = model.cell(incoming, now_state)

        logits = model.linear(incoming)
        if i == seqlen - 1:
            probability_distribution = logits

    ans = torch.argmax(probability_distribution, dim=1)

    return ans

In [76]:
mapper = pickle.load(open('./data/mapper_between_index_and_label.pkl', 'rb'))

raw_test_strings = pickle.load(open('./data/raw_test_strings.pkl', 'rb'))
test_set = pickle.load(open('./data/test_set.pkl', 'rb'))
test_labels = pickle.load(open('./data/test_labels.pkl', 'rb'))

In [77]:
test_set = torch.from_numpy(np.array(test_set, dtype=np.float32))
# tmp_sentence = copy.deepcopy(test_set[0])
# tmp_sentence = tmp_sentence.unsqueeze(0)
# tmp_sentence.shape

In [78]:
def choose_one_for_test(index):

    with torch.no_grad():
        tmp_sentence = copy.deepcopy(test_set[index])
        tmp_sentence = tmp_sentence.unsqueeze(0)
        ans = predict_with_model(trained_model, tmp_sentence, device) 

    return raw_test_strings[index], mapper[test_labels[index]], mapper[ans]

raw_strings = []
true_labels = []
predicted_labels = []
example_number = 50

for i in range(example_number):
    raw_string, true_label, predicted_label = choose_one_for_test(i)
    raw_strings.append(raw_string)
    true_labels.append(true_label)
    predicted_labels.append(predicted_label)

In [79]:
# save the results as csv file
with open('example_results.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['raw_string', 'true_label', 'predicted_label'])
    for i in range(example_number):
        writer.writerow([raw_strings[i], true_labels[i], predicted_labels[i]])

In [1]:
print('the task of data classification is done!')
print('the results are saved as example_results.csv')

the task of data classification is done!
the results are saved as example_results.csv
