In [1]:
import pandas as pd
import torch
from transformers import AlbertTokenizer, AlbertForSequenceClassification

In [2]:
# the English comments dataset
file_path = '/Users/negarakhgar/Desktop/nlp project/data/english_boardgames_comments.csv'
english_comments_df = pd.read_csv(file_path)

In [3]:
model_1 = AlbertForSequenceClassification.from_pretrained('/Users/negarakhgar/Desktop/nlp project/model')
tokenizer_1 = AlbertTokenizer.from_pretrained('/Users/negarakhgar/Desktop/nlp project/model')

In [4]:
model_1.eval()

AlbertForSequenceClassification(
  (albert): AlbertModel(
    (embeddings): AlbertEmbeddings(
      (word_embeddings): Embedding(30000, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0, inplace=False)
    )
    (encoder): AlbertTransformer(
      (embedding_hidden_mapping_in): Linear(in_features=128, out_features=768, bias=True)
      (albert_layer_groups): ModuleList(
        (0): AlbertLayerGroup(
          (albert_layers): ModuleList(
            (0): AlbertLayer(
              (full_layer_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (attention): AlbertSdpaAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=

In [5]:
# The sentiment analysis function
def analyze_sentiment_model_1(text):
    inputs = tokenizer_1(text, padding=True, truncation=True, max_length=512, return_tensors='pt')
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_1.to(device)
    inputs = {key: val.to(device) for key, val in inputs.items()}
    
    with torch.no_grad():
        outputs = model_1(**inputs)
        logits = outputs.logits
        predicted_class = torch.argmax(logits, dim=1).item()
        score = torch.softmax(logits, dim=1).max().item()

        # Defining sentiment labels
    labels = {0: 'negative', 1: 'positive'}
    
    return labels[predicted_class], score

In [6]:
# Applying sentiment analysis using Model 1
english_comments_df[['sentiment_model_1', 'score_model_1']] = english_comments_df['value'].apply(lambda x: pd.Series(analyze_sentiment_model_1(x)))

In [7]:
# Saving the results for Model 1
sentiment_analysis_model_1_file = '/Users/negarakhgar/Desktop/nlp project/boardgames_sentiment_analysis_model_1.csv'
english_comments_df.to_csv(sentiment_analysis_model_1_file, index=False)

print(f"Sentiment analysis using Model 1 completed and results saved to {sentiment_analysis_model_1_file}")

Sentiment analysis using Model 1 completed and results saved to /Users/negarakhgar/Desktop/nlp project/boardgames_sentiment_analysis_model_1.csv


In [8]:
print("\nSample of the results:")
print(english_comments_df[['boardgame_id', 'value', 'sentiment_model_1', 'score_model_1']].head(10))


Sample of the results:
   boardgame_id                                              value  \
0        224517  SLEEVED[IMG]https://cf.geekdo-static.com/mbs/m...   
1        224517  Great game, full controllo of your strategy th...   
2        224517                                      Location: MSK   
3        224517  Very clever game, enjoyable overall.  Plus poi...   
4        224517  Brilliant!  Fits right into my wheelhouse all ...   
5        224517  Absolutely brilliant!  I never played the orig...   
6        224517  I prefer old school Brass or AoI. I do like th...   
7        224517  The game itself is not interesting enough to l...   
8        224517                               "You can't do that."   
9        224517  This is a near-perfect board game because...  ...   

  sentiment_model_1  score_model_1  
0          positive       0.974834  
1          positive       0.998138  
2          negative       0.745414  
3          positive       0.996099  
4          positive 