In [4]:
import spacy
import torch
from transformers import BertForSequenceClassification, BertTokenizer
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Load SpaCy model for preprocessing
nlp = spacy.load("en_core_web_sm")

In [6]:
# Load BERT model and tokenizer for aspect extraction and sentiment analysis
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
aspect_model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
sentiment_model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

tokenizer_config.json: 100%|████████████████████████████████████████████████████████████████| 48.0/48.0 [00:00<?, ?B/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
vocab.txt: 100%|█████████████████████████████████████████████████████████████████████| 232k/232k [00:00<00:00, 373kB/s]
tokenizer.json: 100%|████████████████████████████████████████████████████████████████| 466k/466k [00:00<00:00, 749kB/s]
config.json: 100%|████████████████████████████████████████████████████████████████████████████| 570/570 [00:00<?, ?B/s]
model.safetensors: 100%|████████████████████████████████████████████████████████████| 440M/440M [00:14<00:00, 30.7MB/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initial

In [7]:
# Function to preprocess text
def preprocess(text):
    doc = nlp(text.lower())
    tokens = [token.lemma_ for token in doc if not token.is_stop]
    return " ".join(tokens)

In [8]:
# Function to extract aspects/problems using BERT
def extract_aspects(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    outputs = aspect_model(**inputs)
    logits = outputs.logits
    aspect_labels = torch.argmax(logits, dim=1)
    return aspect_labels

In [9]:
# Function to perform sentiment analysis using BERT
def analyze_sentiment(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    outputs = sentiment_model(**inputs)
    logits = outputs.logits
    sentiment_labels = torch.argmax(logits, dim=1)
    return sentiment_labels

In [10]:
# Example review
review = "One tyre went missing, so there was a delay to get the two tyres fitted. The way garage dealt with it was fantastic."

In [11]:
# Preprocess review
cleaned_review = preprocess(review)

In [12]:
# Extract aspects/problems
aspects = extract_aspects(cleaned_review)

In [13]:
# Analyze sentiment
sentiments = analyze_sentiment(cleaned_review)

In [14]:
# Combine results
results = list(zip(aspects, sentiments))
print(results)

[(tensor(1), tensor(0))]
