In [2]:
import numpy as np
import pandas as pd
import tweepy as x

In [None]:
from textblob import TextBlob

input_file = "grok-tweet.txt"
output_file = "grok-sentiment.json"

with open(input_file, "r", encoding="utf-8") as f:
    lines = f.read().splitlines()

annotated_data = []

for line in lines:
    if not line.strip():
        continue
    
    blob = TextBlob(line)
    polarity = blob.sentiment.polarity

    if polarity > 0.1:
        sentiment = "positive"
    elif polarity < -0.1:
        sentiment = "negative"
    else:
        sentiment = "neutral"
    
    annotated_data.append({
        "text": line,
        "polarity": polarity,
        "sentiment": sentiment
    })

with open(output_file, "w", encoding="utf-8") as out:
    json.dump(annotated_data, out, indent=2, ensure_ascii=False)



In [None]:
import json

with open("grok-sentiment-cleaned.json", "r", encoding="utf-8") as f:
    data = json.load(f)

df = pd.DataFrame(data)

def get_final_label(row):
    corr = str(row["corrected_sentiment"]).strip().lower()
    if corr in ["positive", "negative", "neutral"]:
        return corr
    else:
        return str(row["sentiment"]).strip().lower()

df["final_sentiment"] = df.apply(get_final_label, axis=1)
df.to_csv("grok-sentiment-final.csv", index=False, encoding="utf-8")

In [5]:
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
)

df = pd.read_csv("Dataset/grok-sentiment-final.csv")  

dataset = Dataset.from_pandas(df)

dataset = dataset.train_test_split(test_size=0.1)

labels = ["negative", "neutral", "positive"] 
def encode_label(example):
    example["labels"] = labels.index(example["final_sentiment"])
    return example

dataset = dataset.map(encode_label)

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

def tokenize_function(batch):
    return tokenizer(batch["text"], truncation=True, padding=True)

dataset = dataset.map(tokenize_function, batched=True)

dataset = dataset.remove_columns(["text", "final_sentiment"])

model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=len(labels)
)

data_collator = DataCollatorWithPadding(tokenizer)

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",        
    num_train_epochs=4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    data_collator=data_collator,
)

trainer.train()


Map:   0%|          | 0/401 [00:00<?, ? examples/s]

Map:   0%|          | 0/45 [00:00<?, ? examples/s]

Map:   0%|          | 0/401 [00:00<?, ? examples/s]

Map:   0%|          | 0/45 [00:00<?, ? examples/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,No log,0.443168
2,No log,0.360411
3,No log,0.385632
4,No log,0.422951


TrainOutput(global_step=204, training_loss=0.42245008431228936, metrics={'train_runtime': 1163.1679, 'train_samples_per_second': 1.379, 'train_steps_per_second': 0.175, 'total_flos': 15355108158696.0, 'train_loss': 0.42245008431228936, 'epoch': 4.0})

In [6]:
metrics = trainer.evaluate()
print(metrics)

{'eval_loss': 0.42295122146606445, 'eval_runtime': 7.63, 'eval_samples_per_second': 5.898, 'eval_steps_per_second': 0.786, 'epoch': 4.0}


In [7]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='macro')
    acc = accuracy_score(labels, predictions)
    return {
        "accuracy": acc,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

In [8]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    data_collator=data_collator,
    compute_metrics=compute_metrics  # <--- important
)
trainer.train()
eval_results = trainer.evaluate()
print(eval_results)

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.564383,0.866667,0.537538,0.590741,0.562059
2,No log,0.602619,0.866667,0.537538,0.590741,0.562059
3,No log,0.702848,0.844444,0.537037,0.581481,0.557239
4,No log,0.738256,0.844444,0.537037,0.581481,0.557239


{'eval_loss': 0.7382557392120361, 'eval_accuracy': 0.8444444444444444, 'eval_precision': 0.5370370370370371, 'eval_recall': 0.5814814814814815, 'eval_f1': 0.5572390572390572, 'eval_runtime': 3.3765, 'eval_samples_per_second': 13.327, 'eval_steps_per_second': 1.777, 'epoch': 4.0}


In [27]:
test_texts = [
    "Grok 3 is helpful",
    "idk what i feel about this ai called grok.",
    "grok is not better than gpt.",
    "grok. that's it. that's the tweet.",
]

test_df = pd.DataFrame({"text": test_texts})
test_dataset = Dataset.from_pandas(test_df)

test_dataset = test_dataset.map(lambda x: tokenizer(x["text"], truncation=True, padding=True), batched=True)

test_dataset = test_dataset.remove_columns(["text"])

predictions = trainer.predict(test_dataset)
pred_classes = np.argmax(predictions.predictions, axis=1)
for text, pred in zip(test_texts, pred_classes):
    print(text, "->", labels[pred])


Map:   0%|          | 0/4 [00:00<?, ? examples/s]

Grok 3 is helpful -> positive
idk what i feel about this ai called grok. -> neutral
grok is not better than gpt. -> negative
grok. that's it. that's the tweet. -> positive
