* This Notebook describes how to use the trained model to check which hashtags are accessible and which are not.

* At the end it computes the overall percentage of which hashtags are accessible and which not.

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from transformers import BertForSequenceClassification, BertTokenizer
import torch
import pandas as pd
from collections import Counter
import ast
from transformers import pipeline

In [None]:
# Upload in Google Colab the files that were saved in the training of the model
# and save them inside a new folder, so that you can use its path

model = BertForSequenceClassification.from_pretrained("path to the folder")
tokenizer = BertTokenizer.from_pretrained("path to the folder")

# Send model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Create a prediction pipeline
classifier = pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1
)

In [None]:
# Code to check just some examples by hand
label_map = {"LABEL_0": "Inaccessible", "LABEL_1": "Accessible"}

hashtags = [
    "#ThisIsAccessible",
    "#thisisnotaccessible",
    "#Καλοκαίρι2024",
    "#καλοκαιρι",
    "#καλοκαίρινησί",
    "#ΚαλοκαίριΝησί"
]

for tag in hashtags:
    result = classifier(tag)[0]
    print(f"{tag:30} → {label_map[result['label']]} ({result['score']:.2f})")

In [None]:
# Code to check from a CSV file
df = pd.read_csv("the CSV file")

# Map model labels to human-readable form
label_map = {"LABEL_0": "Inaccessible", "LABEL_1": "Accessible"}

all_hashtags = []
classification_results = []

# Parse hashtags from column
for row in df["hashtags"].dropna():
    try:
        parsed = ast.literal_eval(row)
        if isinstance(parsed, list):
            all_hashtags.extend(parsed)
    except Exception as e:
        print(f"Skipping row due to error: {e}")

# Classify and store results
for tag in all_hashtags:
    result = classifier(tag)[0]
    label = label_map[result["label"]]
    classification_results.append(label)

# Count and calculate percentages
counts = Counter(classification_results)
total = sum(counts.values())
print("\n--- Summary ---")
for label in ["Accessible", "Inaccessible"]:
    count = counts[label]
    percent = 100 * count / total if total else 0
    print(f"{label}: {count} ({percent:.2f}%)")