In [None]:
import numpy as np
import pandas as pd

In [None]:
gen_data = pd.read_csv("item.csv", header=None, names=["Label", "Content"])
gen_data.head()

### Perplexity

In [None]:
!pip install pytorch_pretrained_bert

In [None]:
!pip install spacy ftfy

In [None]:
from pytorch_pretrained_bert import OpenAIGPTTokenizer, OpenAIGPTModel, OpenAIGPTLMHeadModel

# Load pre-trained model (weights)
model = OpenAIGPTLMHeadModel.from_pretrained('openai-gpt')
model.eval()

# Load pre-trained model tokenizer (vocabulary)
tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')

In [None]:
import math
import torch

In [None]:
def score(sentence):
    tokenize_input = tokenizer.tokenize(sentence)
    tensor_input = torch.tensor([tokenizer.convert_tokens_to_ids(tokenize_input)])
    loss = model(tensor_input, lm_labels=tensor_input)
    return math.exp(loss)

In [None]:
liberal_perplexity = np.array([score(i) for i in gen_data[gen_data["Label"] == "Liberal"]["Content"]])
conservative_perplexity = np.array([score(i) for i in gen_data[gen_data["Label"] == "Conservative"]["Content"]])

In [None]:
round(liberal_perplexity.mean(), 2), round(conservative_perplexity.mean(), 2)

In [None]:
round(np.array([score(i) for i in gen_data["Content"]]).mean(), 2)

### Dist

In [None]:
def count_ngram(sentences, n):
    """
    Count the number of unique n-grams
    :param sentences: list, a list of responses
    :param n: int, n-gram
    :return: the number of unique n-grams in sentences
    """
    if len(sentences) == 0:
        print("ERROR, eval_distinct get empty input")
        return

    if type(sentences[0]) != list:
        print("ERROR, eval_distinct takes in a list of <class 'list'>, get a list of {} instead".format(
            type(sentences[0])))
        return

    ngram = set()
    for resp in sentences:
        if len(resp) < n:
            continue
        for i in range(len(resp) - n + 1):
            ngram.add(' '.join(resp[i: i + n]))
    return len(ngram)


def eval_distinct(sentences):
    """
    compute distinct score for the sentences
    :param sentences: list, a list of hyps responses
    :return: average distinct score for 1, 2-gram
    """

    sentences = [list(map(str, tokenizer.encode(sent))) for sent in sentences]

    if len(sentences) == 0:
        print("ERROR, eval_distinct get empty input")
        return

    if type(sentences[0]) != list:
        print("ERROR, eval_distinct takes in a list of <class 'list'>, get a list of {} instead".format(
            type(sentences[0])))
        return

    sentences = [(' '.join(i)).split() for i in sentences]
    num_tokens = sum([len(i) for i in sentences])
    dist1 = count_ngram(sentences, 1) / float(num_tokens)
    dist2 = count_ngram(sentences, 2) / float(num_tokens)
    dist3 = count_ngram(sentences, 3) / float(num_tokens)

    return round(dist1, 2), round(dist2, 2), round(dist3, 2)

In [None]:
eval_distinct(gen_data[gen_data["Label"] == "Conservative"]["Content"])

In [None]:
eval_distinct(gen_data[gen_data["Label"] == "Liberal"]["Content"])

In [None]:
eval_distinct(gen_data["Content"])