In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
import re

In [2]:
import os
import torch
from datasets import load_dataset
from transformers import (
                AutoTokenizer,
                AutoModelForCausalLM,
                Trainer,
                TrainingArguments,
                DataCollatorForLanguageModeling
                )
from torchinfo import summary
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder


In [3]:
!kaggle datasets download -d abhi8923shriv/sentiment-analysis-dataset


Dataset URL: https://www.kaggle.com/datasets/abhi8923shriv/sentiment-analysis-dataset
License(s): CC0-1.0
Downloading sentiment-analysis-dataset.zip to /kaggle/working
 94%|████████████████████████████████████▌  | 51.0M/54.4M [00:00<00:00, 115MB/s]
100%|███████████████████████████████████████| 54.4M/54.4M [00:00<00:00, 132MB/s]


In [4]:
from zipfile import ZipFile

zip_file_name = '/kaggle/working/sentiment-analysis-dataset.zip'

with ZipFile(zip_file_name, 'r') as zip_ref:
    zip_ref.extractall('/kaggle/working/data')

In [5]:
data_train =  pd.read_csv('/kaggle/working/data/train.csv',encoding='ISO-8859-1')

In [6]:
data_train.head(4)

Unnamed: 0,textID,text,selected_text,sentiment,Time of Tweet,Age of User,Country,Population -2020,Land Area (Km²),Density (P/Km²)
0,cb774db0d1,"I`d have responded, if I were going","I`d have responded, if I were going",neutral,morning,0-20,Afghanistan,38928346,652860.0,60
1,549e992a42,Sooo SAD I will miss you here in San Diego!!!,Sooo SAD,negative,noon,21-30,Albania,2877797,27400.0,105
2,088c60f138,my boss is bullying me...,bullying me,negative,night,31-45,Algeria,43851044,2381740.0,18
3,9642c003ef,what interview! leave me alone,leave me alone,negative,morning,46-60,Andorra,77265,470.0,164


In [7]:
# I want to clean the text to proceed further :
# And i want only text and sentiment column , i am gonna select those two.

# data_train_selected = data_train.drop(columns=['textID', 'text', 'Time of Tweet', 'Age of User', 'Country', 'Population -2020', 'Land Area (Km²)', 'Density (P/Km²)'], axis=1)

# data_train_selected = data_train.loc[:, [ 'selected_text', 'sentiment']]

data_train_selected = data_train.filter(items=['selected_text', 'sentiment'])


In [8]:
data_train_selected.head(4)

Unnamed: 0,selected_text,sentiment
0,"I`d have responded, if I were going",neutral
1,Sooo SAD,negative
2,bullying me,negative
3,leave me alone,negative


In [9]:
data_train_selected['selected_text'] = data_train_selected['selected_text'].astype(str)


In [10]:
def clean_text(text):
    # Remove URLs and HTML tags
    text = re.sub(r'http\S+|www\S+|https\S+', '', text)
    text = re.sub(r'<.*?>', '', text)
    
    # Remove punctuation and special characters
    text = re.sub(r'[^\w\s]', '', text)
    
    # Lowercase the text
    text = text.lower()
    
    return text

data_train_selected['cleaned_text'] = data_train_selected['selected_text'].apply(clean_text)

In [11]:
data_train_selected.head(5)

Unnamed: 0,selected_text,sentiment,cleaned_text
0,"I`d have responded, if I were going",neutral,id have responded if i were going
1,Sooo SAD,negative,sooo sad
2,bullying me,negative,bullying me
3,leave me alone,negative,leave me alone
4,"Sons of ****,",negative,sons of


In [12]:
data_train_selected['sentiment'].value_counts()

sentiment
neutral     11118
positive     8582
negative     7781
Name: count, dtype: int64

In [13]:
# Configuration

model_name = 'google-bert/bert-base-uncased'
output_dir = '/kaggle/working/fine_tuned_model'
max_length = 512
batch_size = 8
learning_rate = 1e-4
# epochs = 3


In [14]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
# Load model and tokenizer

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)  # Change num_labels as needed

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [16]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params}")

Total parameters: 109484547


In [17]:
# Trainable parameters:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Trainable parameters: {trainable_params}')

Trainable parameters: 109484547


In [18]:
summary(model)
    

Layer (type:depth-idx)                                       Param #
BertForSequenceClassification                                --
├─BertModel: 1-1                                             --
│    └─BertEmbeddings: 2-1                                   --
│    │    └─Embedding: 3-1                                   23,440,896
│    │    └─Embedding: 3-2                                   393,216
│    │    └─Embedding: 3-3                                   1,536
│    │    └─LayerNorm: 3-4                                   1,536
│    │    └─Dropout: 3-5                                     --
│    └─BertEncoder: 2-2                                      --
│    │    └─ModuleList: 3-6                                  85,054,464
│    └─BertPooler: 2-3                                       --
│    │    └─Linear: 3-7                                      590,592
│    │    └─Tanh: 3-8                                        --
├─Dropout: 1-2                                               --
├─L

In [19]:
# for param in model.bert.embeddings.parameters():
#     param.requires_grad = False

# # Trainable parameters:
# trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
# print(f'Trainable parameters: {trainable_params}')

# for layer in model.bert.encoder.layer[:6]:  # Freeze layers 0 to 5
#     for param in layer.parameters():
#         param.requires_grad = False

# # Trainable parameters:
# trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
# print(f'Trainable parameters: {trainable_params}')

# # Freeze the entire BERT Model(Except the Head)
# for param in model.bert.parameters():
#     param.requires_grad = False
# # Trainable parameters:
# trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
# print(f'Trainable parameters: {trainable_params}')

# # Unfreeze the classification Head:
# for param in model.cls.parameters():
#     param.requires_grad = True


In [20]:
# # Detailed parameter breakdown:
# for name, param in model.named_parameters():
#     print(f"{name}: {param.numel()} parameters (Trainable: {param.requires_grad})")

In [21]:
#Freeze the entire BERT Model(Except the Head)
for param in model.bert.parameters():
    param.requires_grad = True
# Trainable parameters:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Trainable parameters: {trainable_params}')

#Unfreeze the classification Head:
for param in model.classifier.parameters():
    param.requires_grad = True

# Trainable parameters:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Trainable parameters: {trainable_params}')

Trainable parameters: 109484547
Trainable parameters: 109484547


In [22]:
data_test = pd.read_csv('/kaggle/working/data/test.csv',encoding='ISO-8859-1')

In [23]:
data_test

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,textID,text,sentiment,Time of Tweet,Age of User,Country,Population -2020,Land Area (Km²),Density (P/Km²)
0,f87dea47db,Last session of the day http://twitpic.com/67ezh,neutral,morning,0-20,Afghanistan,38928346.0,652860.0,60.0
1,96d74cb729,Shanghai is also really exciting (precisely -...,positive,noon,21-30,Albania,2877797.0,27400.0,105.0
2,eee518ae67,"Recession hit Veronique Branquinho, she has to...",negative,night,31-45,Algeria,43851044.0,2381740.0,18.0
3,01082688c6,happy bday!,positive,morning,46-60,Andorra,77265.0,470.0,164.0
4,33987a8ee5,http://twitpic.com/4w75p - I like it!!,positive,noon,60-70,Angola,32866272.0,1246700.0,26.0
...,...,...,...,...,...,...,...,...,...
4810,,,,,,,,,
4811,,,,,,,,,
4812,,,,,,,,,
4813,,,,,,,,,


In [24]:
data_test_selected = data_test.filter(items=['text', 'sentiment'])

In [25]:
data_test_selected['sentiment'].value_counts()

sentiment
neutral     1430
positive    1103
negative    1001
Name: count, dtype: int64

In [26]:
data_test_selected.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4815 entries, 0 to 4814
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   text       3534 non-null   object
 1   sentiment  3534 non-null   object
dtypes: object(2)
memory usage: 75.4+ KB


In [27]:
data_test_selected.dropna(inplace=True)

In [28]:
data_test_selected['text'] = data_test_selected['text'].astype(str)

def clean_text(text):
    # Remove URLs and HTML tags
    text = re.sub(r'http\S+|www\S+|https\S+', '', text)
    text = re.sub(r'<.*?>', '', text)
    
    # Remove punctuation and special characters
    text = re.sub(r'[^\w\s]', '', text)
    
    # Lowercase the text
    text = text.lower()
    
    return text

data_test_selected['cleaned_text'] = data_test_selected['text'].apply(clean_text)


In [29]:
data_test_selected

Unnamed: 0,text,sentiment,cleaned_text
0,Last session of the day http://twitpic.com/67ezh,neutral,last session of the day
1,Shanghai is also really exciting (precisely -...,positive,shanghai is also really exciting precisely s...
2,"Recession hit Veronique Branquinho, she has to...",negative,recession hit veronique branquinho she has to ...
3,happy bday!,positive,happy bday
4,http://twitpic.com/4w75p - I like it!!,positive,i like it
...,...,...,...
3529,"its at 3 am, im very tired but i can`t sleep ...",negative,its at 3 am im very tired but i cant sleep bu...
3530,All alone in this old house again. Thanks for...,positive,all alone in this old house again thanks for ...
3531,I know what you mean. My little dog is sinkin...,negative,i know what you mean my little dog is sinking...
3532,_sutra what is your next youtube video gonna b...,positive,_sutra what is your next youtube video gonna b...


In [30]:
test_texts = data_test_selected['cleaned_text'].tolist()
test_labels = data_test_selected['sentiment'].tolist()

In [31]:
label_encoder = LabelEncoder()

test_labels_encoded = label_encoder.fit_transform(test_labels)

In [32]:
# Prepare the Dataset Class:
class SentimentDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encoding = tokenizer(
                             self.texts[idx],
                             truncation=True, 
                             padding='max_length',
                             max_length=128
                            )
        # Convert everything to tensors
        item = {key: torch.tensor(val) for key, val in encoding.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item
        # return {**encoding, 'labels':self.labels[idx]}
        
        

In [33]:
test_dataset = SentimentDataset(test_texts, test_labels_encoded)
test_loader = DataLoader(test_dataset, batch_size=16)

In [34]:
train_texts = data_train_selected['cleaned_text'].tolist()
train_labels = data_train_selected['sentiment'].tolist()

In [35]:
label_encoder = LabelEncoder()

train_labels_encoded = label_encoder.fit_transform(train_labels)

In [36]:
train_dataset = SentimentDataset(train_texts, train_labels_encoded)


In [37]:
# Check if GPU is available and set device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)  # Move model to the appropriate device
print(device)

cuda


## Lets test pretrained model on test data, ad evaluate the scores

In [38]:
# model.eval()
# predictions, true_labels = [], []

# with torch.no_grad():
#     for batch in test_loader:
#         # Move batch to the appropriate device
#         for key in batch.keys():
#             batch[key] = batch[key].to(device)

#         outputs = model(**batch)
#         logits = outputs.logits
#         preds = torch.argmax(logits, dim=-1)
#         predictions.extend(preds.cpu().numpy())  # Move predictions back to CPU for metric calculation
#         true_labels.extend(batch['labels'].cpu().numpy())  # Move true labels back to CPU

# # Calculate accuracy and other metrics
# accuracy = accuracy_score(true_labels, predictions)
# report = classification_report(true_labels, predictions, target_names=label_encoder.classes_)

# print(f"Accuracy: {accuracy}")
# print(report)

from tqdm import tqdm  # Import tqdm for progress bar
import torch
from sklearn.metrics import accuracy_score, classification_report

model.eval()
predictions, true_labels = [], []

with torch.no_grad():
    for batch in tqdm(test_loader, desc="Evaluating", unit="batch"):
        # Move batch to the appropriate device
        for key in batch.keys():
            batch[key] = batch[key].to(device)

        outputs = model(**batch)
        
        logits = outputs.logits
        
        preds = torch.argmax(logits, dim=-1)
        
        predictions.extend(preds.cpu().numpy())  # Move predictions back to CPU for metric calculation
        true_labels.extend(batch['labels'].cpu().numpy())  # Move true labels back to CPU

# Calculate accuracy and other metrics
accuracy = accuracy_score(true_labels, predictions)
report = classification_report(true_labels, predictions, target_names=label_encoder.classes_)

print(f"Accuracy: {accuracy:.4f}")
print(report)


Evaluating: 100%|██████████| 221/221 [00:24<00:00,  8.84batch/s]

Accuracy: 0.4046
              precision    recall  f1-score   support

    negative       0.00      0.00      0.00      1001
     neutral       0.40      1.00      0.58      1430
    positive       0.00      0.00      0.00      1103

    accuracy                           0.40      3534
   macro avg       0.13      0.33      0.19      3534
weighted avg       0.16      0.40      0.23      3534




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## So the bert model shows poor performance on the test data, so lets the fine tune it , and check the models performance

In [39]:
# Configuration

model_name = 'google-bert/bert-base-uncased'
output_dir = '/kaggle/working/fine_tuned_model'
max_length = 128
batch_size = 8
learning_rate = 1e-4
epochs = 10

In [40]:
# Training Arguments
training_args = TrainingArguments(
    output_dir=output_dir,
    evaluation_strategy="epoch",
    learning_rate=learning_rate,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=epochs,
    save_strategy="epoch",
    logging_dir="./logs",
    logging_steps=100,
    save_total_limit=2,
    report_to="tensorboard",
    fp16=torch.cuda.is_available(),
    load_best_model_at_end=True
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    # data_collator=data_collator

)

# # To resume from the latest checkpoint
# trainer.train(resume_from_checkpoint=True)

  trainer = Trainer(


In [41]:
# Train the model
trainer.train()

# Evaluate the model on the test dataset after training
eval_results = trainer.evaluate()
print(eval_results)



Epoch,Training Loss,Validation Loss
1,0.4158,0.987391
2,0.3042,1.122791
3,0.3032,1.189343
4,0.3098,2.594054
5,0.5537,5.190302
6,0.5674,5.769426
7,0.393,5.713596
8,0.4867,5.751124
9,0.2598,6.327115
10,0.2542,5.986694




{'eval_loss': 0.9873910546302795, 'eval_runtime': 21.5298, 'eval_samples_per_second': 164.144, 'eval_steps_per_second': 10.265, 'epoch': 10.0}


In [42]:
from tqdm import tqdm  # Import tqdm for progress bar
import torch
from sklearn.metrics import accuracy_score, classification_report

model.eval()
predictions, true_labels = [], []

with torch.no_grad():
    for batch in tqdm(test_loader, desc="Evaluating", unit="batch"):
        # Move batch to the appropriate device
        for key in batch.keys():
            batch[key] = batch[key].to(device)

        outputs = model(**batch)
        
        logits = outputs.logits
        
        preds = torch.argmax(logits, dim=-1)
        
        predictions.extend(preds.cpu().numpy())  # Move predictions back to CPU for metric calculation
        true_labels.extend(batch['labels'].cpu().numpy())  # Move true labels back to CPU

# Calculate accuracy and other metrics
accuracy = accuracy_score(true_labels, predictions)
report = classification_report(true_labels, predictions, target_names=label_encoder.classes_)

print(f"Accuracy: {accuracy:.4f}")
print(report)

Evaluating: 100%|██████████| 221/221 [00:29<00:00,  7.42batch/s]

Accuracy: 0.6095
              precision    recall  f1-score   support

    negative       0.82      0.29      0.42      1001
     neutral       0.51      0.89      0.65      1430
    positive       0.85      0.54      0.66      1103

    accuracy                           0.61      3534
   macro avg       0.73      0.57      0.58      3534
weighted avg       0.70      0.61      0.59      3534






In [43]:
# Save the trained model
trainer.save_model()  # This saves the model to output_dir specified in TrainingArguments

# Save the tokenizer
tokenizer.save_pretrained(training_args.output_dir)  # This saves the tokenizer to the same directory

('/kaggle/working/fine_tuned_model/tokenizer_config.json',
 '/kaggle/working/fine_tuned_model/special_tokens_map.json',
 '/kaggle/working/fine_tuned_model/vocab.txt',
 '/kaggle/working/fine_tuned_model/added_tokens.json',
 '/kaggle/working/fine_tuned_model/tokenizer.json')

In [44]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Define the path where the model is saved
model_path = "/kaggle/working/fine_tuned_model"

# Load the tokenizer
tokenizer_finetuned = AutoTokenizer.from_pretrained(model_path)

# Load the model
model_finetuned = AutoModelForSequenceClassification.from_pretrained(model_path)


## Evaluate test data by loading the saved finetuned model

In [45]:
# # Check if GPU is available and set device accordingly
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model_finetuned.to(device)  # Move model to the appropriate device
# print(device)

cuda


In [46]:
# # Prepare the Dataset Class:
# class SentimentDataset(Dataset):
#     def __init__(self, texts, labels):
#         self.texts = texts
#         self.labels = labels

#     def __len__(self):
#         return len(self.texts)

#     def __getitem__(self, idx):
#         encoding = tokenizer_finetuned(
#                              self.texts[idx],
#                              truncation=True, 
#                              padding='max_length',
#                              max_length=128
#                             )
#         # Convert everything to tensors
#         item = {key: torch.tensor(val) for key, val in encoding.items()}
#         item['labels'] = torch.tensor(self.labels[idx])
#         return item
#         # return {**encoding, 'labels':self.labels[idx]}
        
        

In [47]:
# test_dataset_finetuned_tokenized = SentimentDataset(test_texts, test_labels_encoded)
# test_loader_finetuned = DataLoader(test_dataset, batch_size=16)

In [48]:
# from tqdm import tqdm  # Import tqdm for progress bar
# import torch
# from sklearn.metrics import accuracy_score, classification_report

# model_finetuned.eval()
# predictions, true_labels = [], []

# with torch.no_grad():
#     for batch in tqdm(test_loader_finetuned, desc="Evaluating", unit="batch"):
#         # Move batch to the appropriate device
#         for key in batch.keys():
#             batch[key] = batch[key].to(device)

#         outputs = model_finetuned(**batch)
        
#         logits = outputs.logits
        
#         preds = torch.argmax(logits, dim=-1)
        
#         predictions.extend(preds.cpu().numpy())  # Move predictions back to CPU for metric calculation
#         true_labels.extend(batch['labels'].cpu().numpy())  # Move true labels back to CPU

# # Calculate accuracy and other metrics
# accuracy = accuracy_score(true_labels, predictions)
# report = classification_report(true_labels, predictions, target_names=label_encoder.classes_)

# print(f"Accuracy: {accuracy:.4f}")
# print(report)

Evaluating: 100%|██████████| 221/221 [00:29<00:00,  7.49batch/s]

Accuracy: 0.6095
              precision    recall  f1-score   support

    negative       0.82      0.29      0.42      1001
     neutral       0.51      0.89      0.65      1430
    positive       0.85      0.54      0.66      1103

    accuracy                           0.61      3534
   macro avg       0.73      0.57      0.58      3534
weighted avg       0.70      0.61      0.59      3534






### see we get same report as in test data on fine tuned model before training

In [57]:
from transformers import AutoModel, AutoTokenizer

model.push_to_hub('Wolverine001/bert_finetuned_senti')
tokenizer.push_to_hub('Wolverine001/bert_finetuned_senti')

No files have been modified since last commit. Skipping to prevent empty commit.
No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/Wolverine001/bert_finetuned_senti/commit/426ab4e593c12fbc1a976f8124401d176af8ab92', commit_message='Upload tokenizer', commit_description='', oid='426ab4e593c12fbc1a976f8124401d176af8ab92', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Wolverine001/bert_finetuned_senti', endpoint='https://huggingface.co', repo_type='model', repo_id='Wolverine001/bert_finetuned_senti'), pr_revision=None, pr_num=None)

In [79]:
report_md = """---
language: en
license: mit
datasets: [twitter-sentiment]
metrics: [accuracy, f1, precision, recall]
---

# Fine-Tuned BERT Sentiment Model

This model was fine-tuned for sentiment classification.

- Pre-trained model used:  google-bert/bert-base-uncased.
- Dataset used:            twitter-sentiment.
- max_length = 128
- batch_size = 8
- learning_rate = 1e-4
- epochs = 3

## **Evaluation Results**

### 📌 **Before Fine-Tuning**
**Accuracy:** 0.4046

| Class      | Precision | Recall | F1-Score | Support |
|------------|------------|------------|------------|------------|
| Negative   | 0.00       | 0.00       | 0.00       | 1001 |
| Neutral    | 0.40       | 1.00       | 0.58       | 1430 |
| Positive   | 0.00       | 0.00       | 0.00       | 1103 |
| **Macro Avg**  | 0.13   | 0.33       | 0.19       | 3534 |
| **Weighted Avg**  | 0.16   | 0.40   | 0.23   | 3534 |

---

### ✅ **After Fine-Tuning**
**Accuracy:** 0.6095

| Class      | Precision | Recall | F1-Score | Support |
|------------|------------|------------|------------|------------|
| Negative   | 0.82       | 0.29       | 0.42       | 1001 |
| Neutral    | 0.51       | 0.89       | 0.65       | 1430 |
| Positive   | 0.85       | 0.54       | 0.66       | 1103 |
| **Macro Avg**  | 0.73   | 0.57       | 0.58       | 3534 |
| **Weighted Avg**  | 0.70   | 0.61   | 0.59   | 3534 |

---

You can download the model from [Hugging Face](https://huggingface.co/Wolverine001/bert_finetuned_senti).
"""


In [80]:
from huggingface_hub import HfApi

api = HfApi()

# Push the Markdown content to README.md
api.upload_file(
    path_or_fileobj=report_md.encode(),  # Convert string to bytes
    path_in_repo="README.md",  # Hugging Face uses README.md for model cards
    repo_id="Wolverine001/bert_finetuned_senti",
    repo_type="model",
)

print("Model card updated! Check it at: https://huggingface.co/Wolverine001/bert_finetuned_senti")


Model card updated! Check it at: https://huggingface.co/Wolverine001/bert_finetuned_senti
