## Method 1
### **`Using BERT-type models`**

In [1]:
import numpy as np
import pandas as pd 
from transformers import AutoModel, AutoTokenizer, AutoModelForSequenceClassification
from huggingface_hub import hf_hub_download, list_repo_files
from colorama import Style, Fore, Back
import torch
import os
import time

import logging
logger = logging.getLogger()
logging.basicConfig(level=logging.INFO)

  from .autonotebook import tqdm as notebook_tqdm


### GET Model

In [2]:
repo_id = "tabularisai/multilingual-sentiment-analysis"
local_dir = "/mnt/d/Desktop/HuggingFaceModels/Multi_SA"

### DO NOT RUN AGAIN

In [3]:
# filenames = list_repo_files(repo_id)
# for filename in filenames:
#     print(hf_hub_download(repo_id = repo_id, local_dir = local_dir, filename=f"{filename}"))

In [4]:
tokenizer = AutoTokenizer.from_pretrained(local_dir)
model = AutoModelForSequenceClassification.from_pretrained(local_dir)

def predict_sentiment(text):
    input_text = tokenizer(text, return_tensors = "pt", \
                           padding = "max_length", truncation = True, max_length = 128)
    logger.debug(f'{Style.BRIGHT}{Fore.BLUE}{input_text}{Style.RESET_ALL}')
    with torch.no_grad():
        output_ = model(**input_text)
        logger.debug(f'{Style.BRIGHT}{Fore.GREEN}{output_}{Style.RESET_ALL}')
    prob = torch.nn.functional.softmax(output_.logits, dim = -1)
    sentiment_map = {0: "VERY BAD", 1: "BAD", 2: "NEUTRAL", 3: "GOOD", 4: "VERY GOOD"}
    return [sentiment_map[p] for p in torch.argmax(prob, dim = -1).tolist()]

In [5]:
def pretty_print(sentence, sentiment):
    logger.info(f"{Style.BRIGHT}Verdict: {sentiment}{Style.RESET_ALL}")
    logger.info(f"{Style.BRIGHT}{Fore.BLUE}{sentence}{Style.RESET_ALL}")
    logger.info(f'\n')

### RUN 1

In [8]:
start_time = time.time()
with open("5G_logs_v1.txt", "r") as file:
    for sentence in file:
        sentiment = predict_sentiment(sentence)
        pretty_print(sentence, sentiment)
end_time = time.time()
logger.info(f'{Style.BRIGHT}{Fore.GREEN}Total Time: {(end_time - start_time)}{Style.RESET_ALL}')
logger.info(f'\n')

INFO:root:[1mVerdict: ['VERY GOOD'][0m
INFO:root:[1m[34mUplink Throughput: 1024 kbps
[0m
INFO:root:

INFO:root:[1mVerdict: ['NEUTRAL'][0m
INFO:root:[1m[34mUE 1 - RACH Process: success
[0m
INFO:root:

INFO:root:[1mVerdict: ['NEUTRAL'][0m
INFO:root:[1m[34mUE 0 - Connection Status: connected
[0m
INFO:root:

INFO:root:[1mVerdict: ['NEUTRAL'][0m
INFO:root:[1m[34mUplink Throughput: 2048 kbps
[0m
INFO:root:

INFO:root:[1mVerdict: ['BAD'][0m
INFO:root:[1m[34mUE 1 - Connection Status: disconnected
[0m
INFO:root:

INFO:root:[1mVerdict: ['NEUTRAL'][0m
INFO:root:[1m[34mUE 2 - RACH Process: unsuccessful 
[0m
INFO:root:

INFO:root:[1mVerdict: ['BAD'][0m
INFO:root:[1m[34mUE 1 - Connection Status: disconnected
[0m
INFO:root:

INFO:root:[1mVerdict: ['BAD'][0m
INFO:root:[1m[34mUE 1 - Connection Status: disconnected
[0m
INFO:root:

INFO:root:[1mVerdict: ['NEUTRAL'][0m
INFO:root:[1m[34mDownlink Throughput: 1024 kbps
[0m
INFO:root:

INFO:root:[1mVerdict: ['NEUT

## Method 2: caching results

### **`Building cache`**

In [9]:
import glob
import json
from collections import defaultdict

filenames = glob.glob("*.txt")
versions = []
prefix = "5G_logs_v"
for filename in filenames:
    versions.append(int(filename.split(".")[0][len(prefix):]))
versions.sort()
latest = versions[-1]
versions.pop(-1)

1

### RUN 1

In [18]:
start_time = time.time()
cache = {}
try:
    with open("cache", "r") as cfile:
        cache = json.load(cfile)
except: pass 

for filename in filenames:
    fv = int(filename.split(".")[0][len(prefix):])
    if fv == latest:
        with open(f"{filename}", "r") as file:
            for sentence in file:
                hash_key, is_new = str(hash(sentence)), True
                if hash_key in cache:
                    for sentenceX, sentiment in cache[hash_key]:
                        if sentence == sentenceX: 
                            pretty_print(sentenceX, sentiment)
                            is_new = False
                            break
                if is_new: 
                    if hash_key not in cache: cache[hash_key] = []
                    cache[hash_key].append([sentence, sentiment])
                    sentiment = predict_sentiment(sentence)
                    pretty_print(sentence, sentiment)
end_time = time.time()
logger.info(f'{Style.BRIGHT}{Fore.GREEN}Total Time: {(end_time - start_time)}{Style.RESET_ALL}')
logger.info(f'\n')

with open("cache", "w") as cfile:
    json.dump(cache, cfile, indent = 5)

INFO:root:[1mVerdict: ['VERY GOOD'][0m
INFO:root:[1m[34mUplink Throughput: 1024 kbps
[0m
INFO:root:

INFO:root:[1mVerdict: ['NEUTRAL'][0m
INFO:root:[1m[34mUE 1 - RACH Process: success
[0m
INFO:root:

INFO:root:[1mVerdict: ['NEUTRAL'][0m
INFO:root:[1m[34mUE 0 - Connection Status: connected
[0m
INFO:root:

INFO:root:[1mVerdict: ['NEUTRAL'][0m
INFO:root:[1m[34mUplink Throughput: 2048 kbps
[0m
INFO:root:

INFO:root:[1mVerdict: ['BAD'][0m
INFO:root:[1m[34mUE 1 - Connection Status: disconnected
[0m
INFO:root:

INFO:root:[1mVerdict: ['NEUTRAL'][0m
INFO:root:[1m[34mUE 2 - RACH Process: unsuccessful 
[0m
INFO:root:

INFO:root:[1mVerdict: ['NEUTRAL'][0m
INFO:root:[1m[34mUE 1 - Connection Status: disconnected
[0m
INFO:root:

INFO:root:[1mVerdict: ['NEUTRAL'][0m
INFO:root:[1m[34mUE 1 - Connection Status: disconnected
[0m
INFO:root:

INFO:root:[1mVerdict: ['NEUTRAL'][0m
INFO:root:[1m[34mDownlink Throughput: 1024 kbps
[0m
INFO:root:

INFO:root:[1mVerdict