# 1. Basic Setting
## 1.1 Import Python Libraries

In [10]:
pip install transformers



In [11]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
%cd drive/My\ Drive/data

[Errno 2] No such file or directory: 'drive/My Drive/data'
/content/drive/My Drive/data


# 2. Sentiment Analysis with LLM (without Fine-Tuning)

In [13]:
import os
import random
import pandas as pd

from sklearn.metrics import classification_report

import torch
import torch.nn.functional as F

from tqdm.notebook import tqdm

from transformers import AutoModelForCausalLM, AutoTokenizer

In [14]:
class LM_Prediction:
    def __init__(self, lm_name='gpt2'):
        self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
        self.model = AutoModelForCausalLM.from_pretrained("gpt2")
        self.positive_ids = self.tokenizer.encode('positive', return_tensors='pt')[0]
        self.negative_ids = self.tokenizer.encode('negative', return_tensors='pt')[0]
        return


    def run_model(self, question=None, tokens=None):
        if tokens is None:
            tokens = self.tokenizer.encode(question, return_tensors='pt')[:, :252]
        logits = self.model(tokens).logits
        ans = 2 if F.cross_entropy(logits[:, -1, :], self.positive_ids) < F.cross_entropy(logits[:, -1, :], self.negative_ids) else 1
        return ans

In [15]:
lm = LM_Prediction()

In [16]:
df = pd.read_csv('reduced_test.csv')

In [17]:
n_sample = 100

In [18]:
pred = [
    lm.run_model(question=question)
    for question in tqdm(list(df[:n_sample]['review']))
]

  0%|          | 0/100 [00:00<?, ?it/s]

In [19]:
print(classification_report(df['label'][:n_sample], pred))

              precision    recall  f1-score   support

           1       0.96      0.54      0.69        48
           2       0.70      0.98      0.82        52

    accuracy                           0.77       100
   macro avg       0.83      0.76      0.75       100
weighted avg       0.83      0.77      0.76       100



# 3. Few-shot prompting implementation

In [20]:
class MyPrompt:
    def __init__(self, sample_df, lm_name='gpt2'):
        self.tokenizer = AutoTokenizer.from_pretrained(lm_name)
        self.sample_df = sample_df
        self.sample_num = len(sample_df)
        self.set_options()
        return

    def set_options(self, k=1, no_question=False, no_answer=False, is_random_answer=False):
        self.k = k
        self.no_question = no_question
        self.no_answer = no_answer
        self.is_random_answer = is_random_answer
        return

    def get_prompt(self, question):
        output = []
        for idx in random.sample(range(self.sample_num), self.k):
            if not self.no_question:
                output.append(
                    self.tokenizer.encode(self.sample_df['review'][idx], return_tensors='pt')[:, :252]
                )
            if not self.no_answer:
                ans = (
                    random.choice(['positive', 'negative'])
                    if self.is_random_answer else
                    (
                        'positive' if self.sample_df['label'][idx] == 2 else 'negative'
                    )
                )
                output.append(
                    self.tokenizer.encode('\n'+ans+'\n\n', return_tensors='pt')
                )
        output.append(self.tokenizer.encode(question + "\n", return_tensors='pt')[:, :252])
        return torch.cat(output, dim=-1)

In [21]:
p = MyPrompt(pd.read_csv('reduced_train.csv'))

In [22]:
pred = [
    lm.run_model(tokens=p.get_prompt(question))
    for question in tqdm(list(df[:n_sample]['review']))
]

  0%|          | 0/100 [00:00<?, ?it/s]

In [23]:
print(classification_report(df['label'][:n_sample], pred))

              precision    recall  f1-score   support

           1       0.59      0.33      0.43        48
           2       0.56      0.79      0.66        52

    accuracy                           0.57       100
   macro avg       0.58      0.56      0.54       100
weighted avg       0.58      0.57      0.55       100



In [24]:
p.set_options(k=3)

In [25]:
pred = [
    lm.run_model(tokens=p.get_prompt(question))
    for question in tqdm(list(df[:n_sample]['review']))
]

  0%|          | 0/100 [00:00<?, ?it/s]

In [26]:
print(classification_report(df['label'][:n_sample], pred))

              precision    recall  f1-score   support

           1       0.52      0.50      0.51        48
           2       0.56      0.58      0.57        52

    accuracy                           0.54       100
   macro avg       0.54      0.54      0.54       100
weighted avg       0.54      0.54      0.54       100

