In [None]:
pip install simpletransformers pandas torch

In [None]:
pip install simpletransformers pandas scikit-learn torch

In [32]:
import pandas as pd
import torch
from simpletransformers.classification import ClassificationModel, ClassificationArgs
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [33]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

Using device: cpu


In [34]:
sentence_df = pd.read_csv("sentence-level-annotation.csv")

In [35]:
print("Dataset Preview:")
display(sentence_df.head())

Dataset Preview:


Unnamed: 0.1,Unnamed: 0,Sentence,Hate_speech,Sentiment,Humor,Billing or price,Customer service,Data,Network,Package,Service or product,None
0,0,Ammage Adarayta❤️Eka Dawasak Madi Neda❤️🙏❤️,Not offensive,Negative,Non-humorous,0,0,0,0,0,0,1
1,1,We need IPL Champions leak data offers ..pleas...,Not offensive,Neutral,Non-humorous,0,0,1,0,0,0,0
2,2,#VPN #ummmaaa #proud_be,Not offensive,Neutral,Non-humorous,0,0,0,0,0,0,1
3,3,chandimal.. uuu thama mulu tem ekama kaaa gaha...,Not offensive,Positive,Non-humorous,0,0,0,0,0,0,1
4,4,Batzgo,Not offensive,Neutral,Non-humorous,0,0,0,0,0,0,1


In [36]:
sentence_df = sentence_df[['Sentence', 'Sentiment']]
sentence_df.dropna(inplace=True)

In [38]:
sentence_df.columns = sentence_df.columns.str.strip()

In [39]:
sentence_df["Sentiment"] = sentence_df["Sentiment"].astype(str).str.strip()

In [40]:
label_map = {"Negative": 0, "Neutral": 1, "Positive": 2}
sentence_df["Sentiment"] = sentence_df["Sentiment"].map(label_map)

In [41]:
sentence_df = sentence_df.dropna()

In [42]:
sentence_df["Sentiment"] = sentence_df["Sentiment"].astype(int)

In [43]:
print("\nDataset After Preprocessing:")
display(sentence_df.head())


Dataset After Preprocessing:


Unnamed: 0,Sentence,Sentiment
0,Ammage Adarayta❤️Eka Dawasak Madi Neda❤️🙏❤️,0
1,We need IPL Champions leak data offers ..pleas...,1
2,#VPN #ummmaaa #proud_be,1
3,chandimal.. uuu thama mulu tem ekama kaaa gaha...,2
4,Batzgo,1


In [44]:
print("\nData Types:")
print(sentence_df.dtypes)


Data Types:
Sentence     object
Sentiment     int32
dtype: object


In [None]:
train_df, test_df = train_test_split(sentence_df, test_size=0.2, random_state=42)
test_df, eval_df = train_test_split(test_df, test_size=0.1, random_state=42)

In [None]:
print(f"Training Samples: {len(train_df)}")
print(f"Testing Samples: {len(test_df)}")

Training Samples: 10715
Testing Samples: 2679


In [47]:
model_args = ClassificationArgs()
model_args.num_train_epochs = 3
model_args.train_batch_size = 16
model_args.eval_batch_size = 16
model_args.learning_rate = 2e-5
model_args.overwrite_output_dir = True
model_args.save_best_model = True
model_args.evaluate_during_training = True
model_args.save_eval_checkpoints = False
model_args.save_model_every_epoch = False
model_args.output_dir = "xlm-roberta-sentiment-model"
model_args.best_model_dir = "xlm-roberta-best-model"

In [48]:
model = ClassificationModel(
    "xlmroberta", "xlm-roberta-base",
    num_labels=3,
    args=model_args,
    use_cuda=torch.cuda.is_available()
)

print("\n✅ Model Loaded Successfully!")

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



✅ Model Loaded Successfully!




In [49]:
print("\n🚀 Training Started...")
model.train_model(train_df, eval_df=eval_df)



🚀 Training Started...




Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/670 [00:00<?, ?it/s]



  0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/670 [00:00<?, ?it/s]



  0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 3 of 3:   0%|          | 0/670 [00:00<?, ?it/s]



  0%|          | 0/5 [00:00<?, ?it/s]



  0%|          | 0/5 [00:00<?, ?it/s]

(2010,
 defaultdict(list,
             {'global_step': [670, 1340, 2000, 2010],
              'train_loss': [0.27041223645210266,
               0.45960161089897156,
               0.15427254140377045,
               0.7333008050918579],
              'mcc': [0.5487675161125251,
               0.5764989153800335,
               0.5914864715003111,
               0.5908460381288406],
              'eval_loss': [0.5489174436245646,
               0.5584702179101961,
               0.5434878821529093,
               0.5435469022153744]}))

In [None]:
print("\n🔍 Evaluating Model...")
result, model_outputs, wrong_predictions = model.eval_model(test_df, acc=accuracy_score)


🔍 Evaluating Model...




  0%|          | 0/5 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/168 [00:00<?, ?it/s]

In [51]:
print(f"\nModel Accuracy: {result['acc']:.4f}")


Model Accuracy: 0.7947


In [52]:
predictions, raw_outputs = model.predict(eval_df["Sentence"].tolist())
print("\n📊 Classification Report:\n")
print(classification_report(eval_df["Sentiment"], predictions))

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/168 [00:00<?, ?it/s]


📊 Classification Report:

              precision    recall  f1-score   support

           0       0.72      0.75      0.73       734
           1       0.84      0.86      0.85      1708
           2       0.68      0.49      0.57       237

    accuracy                           0.79      2679
   macro avg       0.75      0.70      0.72      2679
weighted avg       0.79      0.79      0.79      2679



In [None]:
sample_texts = ["Fiber ඉල්ලලා දැන් අව්රුද්දක් විතර😒 blloh😒"]
predictions, raw_outputs = model.predict(sample_texts)

0it [00:00, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

In [63]:
reverse_label_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
predictions = [reverse_label_map[p] for p in predictions]

In [64]:
print("\n💡 Sample Predictions:")
for text, pred in zip(sample_texts, predictions):
    print(f"📝 Text: {text} --> Predicted Sentiment: {pred}")


💡 Sample Predictions:
📝 Text: Fiber à¶‰à¶½à·Šà¶½à¶½à· à¶¯à·à¶±à·Š à¶…à·€à·Šà¶»à·”à¶¯à·Šà¶¯à¶šà·Š à·€à·’à¶­à¶»ðŸ˜’ bllohðŸ˜’ --> Predicted Sentiment: Neutral


In [61]:
model.save_model("my_simpletransformers_model")
