# Hosting VLLM on Server

```
python -m vllm.entrypoints.openai.api_server --model google/gemma-3n-E4B-it --dtype bfloat16 --gpu-memory-utilization 0.95 --max-model-len 1024 --port 9999 --tensor-parallel-size 1

```

In [1]:
import os
from tqdm import tqdm
import pandas as pd

from src.genai import GenAIClassifier

✓ All random seeds set to 42


In [2]:
data_root = "../data/multipride_data/"
figures_root = "../figures/"
results_root = "../results/train/"
os.makedirs(figures_root, exist_ok=True)
os.makedirs(results_root, exist_ok=True)

train_files = [file for file in os.listdir(data_root) if (file.endswith(".csv") and ("train" in file))]
train_files

['train_en.csv', 'train_es.csv', 'train_it.csv']

In [3]:
train_df = pd.DataFrame()

for file in train_files:
    temp_df = pd.read_csv(os.path.join(data_root, file))
    if "en" in file:
        temp_df["bio"] = [None] * temp_df.shape[0]
    train_df = pd.concat([train_df, temp_df], ignore_index=True)

print(f"Total training samples: {train_df.shape[0]}")

Total training samples: 2988


In [4]:
train_df.head()

Unnamed: 0,id,text,label,lang,bio
0,en_1021,"I've never heard anyone use the word ""faggot"" ...",0,en,
1,en_1496,So you don't see the slighest problem of someb...,0,en,
2,en_1312,"And to be fair, getting triggered by slurs is ...",1,en,
3,en_469,"I kinda feel like it's saying ""the faggot comm...",0,en,
4,en_565,"Homophobia, racism, and the resulting endless ...",0,en,


In [5]:
# model_name = "HuggingFaceTB/SmolLM3-3B"
# model_name = "microsoft/Phi-3.5-mini-instruct"
# model_name = "tiiuae/Falcon3-3B-Instruct"
# model_name = "Qwen/Qwen2.5-Omni-7B"
model_name = "google/gemma-3n-E4B-it"

# OOM Models
# model_name = "Qwen/Qwen2.5-7B-Instruct"
# model_name = "mistralai/Mistral-7B-Instruct-v0.3"
genai_classifier = GenAIClassifier(model_name=model_name)

In [6]:
set(train_df.lang)

{'en', 'es', 'it'}

In [7]:
language_mapper = {
    "en": "English",
    "es": "Spanish",
    "it": "Italian"
}

In [8]:
train_df.text[0], train_df.lang[0]

('I\'ve never heard anyone use the word "faggot" and not sound like an insecure 13 year old. ',
 'en')

# Sample Test Query

In [9]:
use_advanced=True
result = genai_classifier.classify(train_df.text[0], language_mapper[train_df.lang[0]], use_advanced=use_advanced)
result

{'tweet': 'I\'ve never heard anyone use the word "faggot" and not sound like an insecure 13 year old. ',
 'language': 'English',
 'classification': 'NOT_RECLAMATORY',
 'classification_label': 0,
 'raw_response': 'NOT_RECLAMATORY',
 'model': 'google/gemma-3n-E4B-it'}

# Running for All

In [10]:

if use_advanced:
    result_file = "train_cot_prompt.csv"
else:
    result_file = "train_simple_prompt.csv"
    
    

In [11]:
predicted_labels = []
for row in tqdm(train_df.itertuples(index=False), total=len(train_df), desc="Classifying"):
    result = genai_classifier.classify(row.text, language_mapper[row.lang], use_advanced=use_advanced)
    predicted_labels.append(result["classification_label"])


Classifying: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2988/2988 [26:04<00:00,  1.91it/s]


In [12]:
results_write_path = os.path.join(results_root, result_file)

if os.path.exists(results_write_path):
    init_df = pd.read_csv(results_write_path)
else:
    init_df = train_df[["id", "lang", "label"]]

init_df[model_name] = predicted_labels

init_df.to_csv(results_write_path, index=False)