# Aspect-Based Sentiment Analysis using Generative LM with Streamlit

## Installation

In [None]:
!pip install regex sentencepiece==0.1.94 git+https://github.com/huggingface/transformers.git streamlit

Collecting git+https://github.com/huggingface/transformers.git
  Cloning https://github.com/huggingface/transformers.git to /tmp/pip-req-build-nosp_mhq
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers.git /tmp/pip-req-build-nosp_mhq
  Resolved https://github.com/huggingface/transformers.git to commit 9924df9eb234b979ee3fb3f5463e039b9e9623dd
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [None]:
from transformers import AutoModel, AutoTokenizer, GPT2LMHeadModel
import os
import random
import numpy as np
import torch

import transformers
import logging
import re

In [None]:
def set_global_logging_level(level=logging.ERROR, prefices=[""]):
    """
    Override logging levels of different modules based on their name as a prefix.
    It needs to be invoked after the modules have been loaded so that their loggers have been initialized.

    Args:
        - level: desired level. e.g. logging.INFO. Optional. Default is logging.ERROR
        - prefices: list of one or more str prefices to match (e.g. ["transformers", "torch"]). Optional.
          Default is `[""]` to match all active loggers.
          The match is a case-sensitive `module_name.startswith(prefix)`
    """
    prefix_re = re.compile(fr'^(?:{ "|".join(prefices) })')
    for name in logging.root.manager.loggerDict:
        if re.match(prefix_re, name):
            logging.getLogger(name).setLevel(level)

set_global_logging_level(logging.ERROR, ["transformers", "nlp", "tensorflow", "tensorboard", "wandb"])

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Create a streamlit app

In [None]:
%%writefile app.py
from transformers import AutoModel, AutoTokenizer, GPT2LMHeadModel
import os
import random
import numpy as np
import torch

import transformers
import logging
import re
import streamlit as st

st.title('Aspect-Based Sentiment Analysis using Generative LM')

st.subheader('Input', divider='gray')
# Input
subtask = st.selectbox(
'Subtask',
('aspect_term', 'aspect_category', 'single_term_polarity', 'single_category_polarity', 'aspect_term_aspect_category'))

input = st.text_area('Review Sentence', 'it was pleasantly uncrowded, the service was delightful, the garden adorable, the food (from appetizers to entrees) was delectable.')
submit = st.button("Submit", type="primary")

# Load model
GPU = 0
ckpt_dir = '/content/drive/MyDrive/NLP_Project/checkpoint'

if subtask == 'single_term_polarity':
  ckpt_path = 'single_term_polarity/semeval16_restaurants_sb1_aspect_term_single_train/gpt2/gpt2_block128_batch8_lr5e-5_warm0_epochs20_log1000_seed42/checkpoint-4000'
elif subtask == 'single_category_polarity':
  ckpt_path = 'single_category_polarity/semeval16_restaurants_sb1_aspect_category_single_train/gpt2/gpt2_block128_batch8_lr5e-5_warm0_epochs20_log1000_seed42/checkpoint-5000'
elif subtask == 'aspect_term':
  ckpt_path = 'aspect_term/semeval16_restaurants_sb1_aspect_term_train/gpt2/gpt2_block128_batch8_lr5e-5_warm0_epochs20_log1000_seed42/checkpoint-3000'
elif subtask == 'aspect_category':
  ckpt_path = 'aspect_category/semeval16_restaurants_sb1_aspect_category_train/gpt2/gpt2_block128_batch8_lr5e-5_warm0_epochs20_log1000_seed42/checkpoint-4000'
else:
  ckpt_path = 'aspect_term_aspect_category/semeval16_restaurants_sb1_aspect_term_aspect_category_train/gpt2/gpt2_block128_batch8_lr5e-5_warm0_epochs20_log1000_seed42/checkpoint-4000'

model_path = os.path.join(ckpt_dir, ckpt_path)
model= GPT2LMHeadModel.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)


if torch.cuda.is_available():
    device = torch.device("cuda", index=GPU)
else:
    device = torch.device("cpu")
model.to(device)
model.device

if submit:
    # define sequence task tokens
    target_token = '<|term|>'
    target_end_token = '<|endoftext|>'
    s_token = '<|review|>'
    s_end_token = '<|endofreview|>'

    sequence = f"{target_end_token} {s_token} {input} {s_end_token}"


    # generate output
    max_length = 128
    out = tokenizer.decode(model.generate(tokenizer.encode(sequence, return_tensors='pt').to(device),
                                          max_length=max_length,
                                          do_sample=True)[0])

    # output
    st.subheader('Output', divider='gray')

    if subtask == 'single_term_polarity':
        term = out.split("<|term|>")[-1].split("<|endofterm|>")[0]
        st.text_input('Term-polarity: ', term)
    elif subtask == 'single_category_polarity':
        category = out.split("<|category|>")[-1].split("<|endofcategory|>")[0]
        st.text_input('Category-polarity: ', category)
    elif subtask == 'aspect_term':
        term = out.split("<|term|>")[-1].split("<|endofterm|>")[0]
        st.text_input('Term-polarity: ', term)
    elif subtask == 'aspect_category':
        category = out.split("<|category|>")[-1].split("<|endofcategory|>")[0]
        st.text_input('Category-polarity: ', category)
    else:
        term = out.split("<|term|>")[-1].split("<|endofterm|>")[0]
        category = out.split("<|category|>")[-1].split("<|endofcategory|>")[0]
        st.text_input('Term-polarity: ', term)
        st.text_input('Category-polarity: ', category)


Overwriting app.py


## Install localtunnel

In [None]:
!npm install localtunnel

[K[?25h[37;40mnpm[0m [0m[30;43mWARN[0m [0m[35msaveError[0m ENOENT: no such file or directory, open '/content/package.json'
[0m[37;40mnpm[0m [0m[30;43mWARN[0m [0m[35menoent[0m ENOENT: no such file or directory, open '/content/package.json'
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No description
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No repository field.
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No README data
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No license field.
[0m
[K[?25h+ localtunnel@2.0.2
updated 1 package and audited 36 packages in 0.718s

3 packages are looking for funding
  run `npm fund` for details

found 2 [93mmoderate[0m severity vulnerabilities
  run `npm audit fix` to fix them, or `npm audit` for details


## Run streamlit in background

In [None]:
!streamlit run /content/app.py &>/content/logs.txt &

Click in the `url` showed.

A `log.txt`file will be created.

Get the **External URL**, exclude port and paste to the **Endpoint IP** input box.


In [None]:
!npx localtunnel --port 8501

[K[?25hnpx: installed 22 in 3.556s
your url is: https://eleven-toys-hammer.loca.lt
/root/.npm/_npx/25403/lib/node_modules/localtunnel/bin/lt.js:81
    throw err;
    ^

Error: connection refused: localtunnel.me:37837 (check your firewall settings)
    at Socket.<anonymous> (/root/.npm/_npx/25403/lib/node_modules/[4mlocaltunnel[24m/lib/TunnelCluster.js:52:11)
[90m    at Socket.emit (events.js:315:20)[39m
[90m    at emitErrorNT (internal/streams/destroy.js:106:8)[39m
[90m    at emitErrorCloseNT (internal/streams/destroy.js:74:3)[39m
[90m    at processTicksAndRejections (internal/process/task_queues.js:80:21)[39m
