In [None]:
– Transformer architectures: BERT, DistilBERT, RoBERTa
– Transfer learning and fine-tuning
– Evaluation metrics: F1 score, ROC AUC
– Train a text classifier using HuggingFace Transformers
– Fine-tune BERT on a custom dataset (e.g., reviews or tickets)
– Evaluate model performance using standard metrics

In [10]:
!pip install transformers torch



Generate embedding using transformers architecture.

In [11]:
sentences= [
    "I love natural language processing.",
    "The cat sat on the mat.",
    "Artificial intelligence is transforming the world.",
    "I really enjoy learning about AI and machine learning."
]

In [12]:
from transformers import AutoTokenizer, AutoModel
import torch

# 1. Load the pre-trained Tokenizer
tokenizer_bert = AutoTokenizer.from_pretrained('bert-base-uncased')

# 2. Load the pre-trained BERT Model

model_bert = AutoModel.from_pretrained('bert-base-uncased')

print("--- BERT Embeddings ---")
for i, sentence in enumerate(sentences):

    inputs = tokenizer_bert(sentence, return_tensors='pt', padding=True, truncation=True)




    with torch.no_grad():
        outputs = model_bert(**inputs)


    sentence_embedding_bert = outputs.last_hidden_state[:, 0, :].squeeze(0)

    print(f"Sentence {i+1}: '{sentence}'")
    print(f"Embedding shape: {sentence_embedding_bert.shape}") # Should be (768,) for bert-base
    print(f"First 5 dimensions: {sentence_embedding_bert[:5].tolist()}")
    print("-" * 30)


--- BERT Embeddings ---
Sentence 1: 'I love natural language processing.'
Embedding shape: torch.Size([768])
First 5 dimensions: [-0.0419006273150444, 0.04342783987522125, -0.25335681438446045, -0.3501684367656708, -0.37434783577919006]
------------------------------
Sentence 2: 'The cat sat on the mat.'
Embedding shape: torch.Size([768])
First 5 dimensions: [-0.3642236888408661, -0.053053777664899826, -0.3673226237297058, -0.029673390090465546, -0.460783988237381]
------------------------------
Sentence 3: 'Artificial intelligence is transforming the world.'
Embedding shape: torch.Size([768])
First 5 dimensions: [-0.039672814309597015, 0.1846999078989029, 0.017960527911782265, -0.06997756659984589, -0.3907631039619446]
------------------------------
Sentence 4: 'I really enjoy learning about AI and machine learning.'
Embedding shape: torch.Size([768])
First 5 dimensions: [0.1718682050704956, -0.11388571560382843, -0.2885165214538574, -0.1145595833659172, -0.4419533610343933]
---------

In [15]:
from transformers import AutoTokenizer, AutoModel
import torch

# 1. Load the pre-trained Tokenizer
tokenizer_bert = AutoTokenizer.from_pretrained('distilbert-base-uncased')

# 2. Load the pre-trained BERT Model

model_bert = AutoModel.from_pretrained('distilbert-base-uncased')

print("--- distillbert Embeddings ---")
for i, sentence in enumerate(sentences):

    inputs = tokenizer_bert(sentence, return_tensors='pt', padding=True, truncation=True)




    with torch.no_grad():
        outputs = model_bert(**inputs)


    sentence_embedding_bert = outputs.last_hidden_state[:, 0, :].squeeze(0)

    print(f"Sentence {i+1}: '{sentence}'")
    print(f"Embedding shape: {sentence_embedding_bert.shape}") # Should be (768,) for bert-base
    print(f"First 5 dimensions: {sentence_embedding_bert[:5].tolist()}")
    print("-" * 30)

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

--- distillbert Embeddings ---
Sentence 1: 'I love natural language processing.'
Embedding shape: torch.Size([768])
First 5 dimensions: [-0.00508827855810523, 0.12470056861639023, -0.2978385090827942, -0.2091563642024994, -0.012655766680836678]
------------------------------
Sentence 2: 'The cat sat on the mat.'
Embedding shape: torch.Size([768])
First 5 dimensions: [-0.2713077962398529, -0.07814168184995651, -0.021572425961494446, 0.01689116843044758, -0.05309395492076874]
------------------------------
Sentence 3: 'Artificial intelligence is transforming the world.'
Embedding shape: torch.Size([768])
First 5 dimensions: [-0.2557821273803711, -0.20507413148880005, -0.23612262308597565, -0.10314884036779404, -0.13815921545028687]
------------------------------
Sentence 4: 'I really enjoy learning about AI and machine learning.'
Embedding shape: torch.Size([768])
First 5 dimensions: [0.039819225668907166, 0.06452791392803192, -0.2433318793773651, -0.08441849052906036, -0.020516738295555

In [16]:
from transformers import AutoTokenizer, AutoModel
import torch

# 1. Load the pre-trained Tokenizer
tokenizer_bert = AutoTokenizer.from_pretrained('roberta-base')

# 2. Load the pre-trained BERT Model

model_bert = AutoModel.from_pretrained('roberta-base')

print("--- distillbert Embeddings ---")
for i, sentence in enumerate(sentences):

    inputs = tokenizer_bert(sentence, return_tensors='pt', padding=True, truncation=True)




    with torch.no_grad():
        outputs = model_bert(**inputs)


    sentence_embedding_bert = outputs.last_hidden_state[:, 0, :].squeeze(0)

    print(f"Sentence {i+1}: '{sentence}'")
    print(f"Embedding shape: {sentence_embedding_bert.shape}") # Should be (768,) for bert-base
    print(f"First 5 dimensions: {sentence_embedding_bert[:5].tolist()}")
    print("-" * 30)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


--- distillbert Embeddings ---
Sentence 1: 'I love natural language processing.'
Embedding shape: torch.Size([768])
First 5 dimensions: [-0.1002349704504013, 0.14735163748264313, -0.024812430143356323, -0.09707959741353989, 0.11904925107955933]
------------------------------
Sentence 2: 'The cat sat on the mat.'
Embedding shape: torch.Size([768])
First 5 dimensions: [-0.14549586176872253, 0.10518638789653778, 0.0018203345825895667, -0.1087399497628212, 0.15686963498592377]
------------------------------
Sentence 3: 'Artificial intelligence is transforming the world.'
Embedding shape: torch.Size([768])
First 5 dimensions: [-0.07986456155776978, 0.11322732269763947, -0.013730237260460854, -0.1160115897655487, 0.12806421518325806]
------------------------------
Sentence 4: 'I really enjoy learning about AI and machine learning.'
Embedding shape: torch.Size([768])
First 5 dimensions: [-0.09177283197641373, 0.11600624769926071, -0.0263421181589365, -0.07305334508419037, 0.10877197235822678]

| Model      | Size            | Speed          | Training Data & Strategy           | Use Case                      |
| ---------- | --------------- | -------------- | ---------------------------------- | ----------------------------- |
| BERT       | Large           | Moderate       | MLM + NSP, moderate training data  | Strong baseline for NLP tasks |
| DistilBERT | Smaller (\~40%) | Faster (\~60%) | Distilled from BERT                | Resource-limited environments |
| RoBERTa    | Large           | Moderate       | More data, no NSP, longer training | Improved accuracy over BERT   |


In [17]:
pip install transformers torch scikit-learn pandas



In [1]:
import pandas as pd

In [None]:
# We will make a small dataset of movie reviews and their sentimetns
# then we will convert the sentimetns into (Positive, negative)(1,0)
# print dataset with label

In [2]:
data = {
    'text': [
        "This movie was fantastic! I loved every minute.",
        "Absolutely terrible film. A waste of time.",
        "The plot was compelling and the acting superb.",
        "Boring and predictable. I fell asleep.",
        "A truly remarkable cinematic experience.",
        "Not worth the ticket price. Very disappointing.",
        "Outstanding direction and powerful performances.",
        "Couldn't stand it. So dull.",
        "An absolute masterpiece of storytelling.",
        "Awful, just awful. Don't bother watching."
    ],
    'label': [
        'positive',
        'negative',
        'positive',
        'negative',
        'positive',
        'negative',
        'positive',
        'negative',
        'positive',
        'negative'
    ]
}
df = pd.DataFrame(data)

In [3]:
df

Unnamed: 0,text,label
0,This movie was fantastic! I loved every minute.,positive
1,Absolutely terrible film. A waste of time.,negative
2,The plot was compelling and the acting superb.,positive
3,Boring and predictable. I fell asleep.,negative
4,A truly remarkable cinematic experience.,positive
5,Not worth the ticket price. Very disappointing.,negative
6,Outstanding direction and powerful performances.,positive
7,Couldn't stand it. So dull.,negative
8,An absolute masterpiece of storytelling.,positive
9,"Awful, just awful. Don't bother watching.",negative


In [4]:
# Map lables to the numerical ids
label_map = {'positive': 1, 'negative': 0}
df['label_id'] = df['label'].map(label_map)

print(df)

                                               text     label  label_id
0   This movie was fantastic! I loved every minute.  positive         1
1        Absolutely terrible film. A waste of time.  negative         0
2    The plot was compelling and the acting superb.  positive         1
3            Boring and predictable. I fell asleep.  negative         0
4          A truly remarkable cinematic experience.  positive         1
5   Not worth the ticket price. Very disappointing.  negative         0
6  Outstanding direction and powerful performances.  positive         1
7                       Couldn't stand it. So dull.  negative         0
8          An absolute masterpiece of storytelling.  positive         1
9         Awful, just awful. Don't bother watching.  negative         0


In [5]:
# traditional method TF-IDF
# without trasnfer learning kya output hai

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [7]:
# split the data into train and test
X_train, X_test, y_train, y_test = train_test_split(
    df['text'], df['label_id'], test_size=0.3, random_state=42, stratify=df['label_id']
)

# create a pipeline for tfidf
# logistic regrsessgion model

pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english', max_features=100)), # Limiting features for this small data
    ('classifier', LogisticRegression(random_state=42, solver='liblinear'))
])

# train the pipiline
pipeline.fit(X_train, y_train)

# predict on test data
y_pred = pipeline.predict(X_test)

# calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy (TF-IDF + Logistic Regression): {accuracy:.4f}")

Accuracy (TF-IDF + Logistic Regression): 0.3333


In [8]:
# transfer learning - BERT embeddings +Logistic regression

from transformers import AutoTokenizer, AutoModel
import torch
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

In [11]:
# Load pre-trained BERT tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
model = AutoModel.from_pretrained('bert-base-uncased')
model.eval() # Set model to evaluation mode (no training happening)

# Function to get BERT embeddings for a list of texts
def get_bert_embeddings(texts):
    embeddings = []
    for text in texts:
        inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=128)
        with torch.no_grad():
            outputs = model(**inputs)
        # Use the [CLS] token embedding as the sentence representation
        sentence_embedding = outputs.last_hidden_state[:, 0, :].squeeze(0)
        embeddings.append(sentence_embedding.numpy()) # Convert to NumPy array
    return np.array(embeddings)

# Generate embeddings for training and testing sets
X_train_embeddings = get_bert_embeddings(X_train.tolist())
X_test_embeddings = get_bert_embeddings(X_test.tolist())

print(f"Shape of X_train_embeddings: {X_train_embeddings.shape}") # (num_samples, 768)

# Train a Logistic Regression classifier on BERT embeddings
classifier_bert_features = LogisticRegression(random_state=42, solver='liblinear', max_iter=200)
classifier_bert_features.fit(X_train_embeddings, y_train)

# Make predictions
y_pred_bert_features = classifier_bert_features.predict(X_test_embeddings)

# Evaluate
accuracy_bert_features = accuracy_score(y_test, y_pred_bert_features)
print(f"Accuracy (BERT Features + Logistic Regression): {accuracy_bert_features:.4f}")


Shape of X_train_embeddings: (7, 768)
Accuracy (BERT Features + Logistic Regression): 1.0000


In [12]:
!pip install transformers[torch] accelerate datasets scikit-learn pandas matplotlib seaborn



In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix,roc_curve, auc,RocCurveDisplay
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset # hugging facee dataset format

# Set a seed for random repoducibilty
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

print("Libraries loaded successfully!")
print(f"PyTorch CUDA available: {torch.cuda.is_available()}")


Libraries loaded successfully!
PyTorch CUDA available: False


# custom dataset creation
1. train test mai split
2. hum usko fine tuning eembeddings generate krke
3. Hum uspr f1 score or baki cheeze check krenge

In [2]:
# Sentiment analysis

data ={
    'text': [
        "This product is absolutely amazing! I highly recommend it.",
        "Terrible experience, completely disappointed. Waste of money.",
        "The customer service was excellent and very helpful.",
        "Boring and uninspired film, I fell asleep multiple times.",
        "A truly remarkable performance by the lead actress.",
        "Received a broken item, utterly useless. Very frustrating.",
        "The food was delicious and the ambiance perfect.",
        "Worst app ever. Constantly crashes and full of bugs.",
        "So happy with my purchase, exceeded all expectations!",
        "Poor quality materials and shoddy craftsmanship. Avoid!",
        "Fast delivery and great value. Will buy again.",
        "Overpriced and underdelivered. Feel ripped off.",
        "An absolute masterpiece of storytelling and visuals.",
        "Never again. Dreadful service and rude staff.",
        "Highly intuitive interface and powerful features. Love it!",
        "Completely broken software. Can't get anything done.",
        "The plot was compelling and the acting superb.",
        "Misleading advertisement. Not what I expected at all."
    ],
    'label': [
        'positive', 'negative', 'positive', 'negative', 'positive',
        'negative', 'positive', 'negative', 'positive', 'negative',
        'positive', 'negative', 'positive', 'negative', 'positive',
        'negative', 'positive', 'negative'
    ]
}

df = pd.DataFrame(data)

# map string labes to numerical ids (0 for negative and 1 for positive)
label_map = {'positive': 1, 'negative': 0}
df['label_id'] = df['label'].map(label_map)

print(df)

                                                 text     label  label_id
0   This product is absolutely amazing! I highly r...  positive         1
1   Terrible experience, completely disappointed. ...  negative         0
2   The customer service was excellent and very he...  positive         1
3   Boring and uninspired film, I fell asleep mult...  negative         0
4   A truly remarkable performance by the lead act...  positive         1
5   Received a broken item, utterly useless. Very ...  negative         0
6    The food was delicious and the ambiance perfect.  positive         1
7   Worst app ever. Constantly crashes and full of...  negative         0
8   So happy with my purchase, exceeded all expect...  positive         1
9   Poor quality materials and shoddy craftsmanshi...  negative         0
10     Fast delivery and great value. Will buy again.  positive         1
11    Overpriced and underdelivered. Feel ripped off.  negative         0
12  An absolute masterpiece of storyte

In [3]:
# split the dataframe into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['label_id'])

print(f"\nTraining set size: {len(train_df)} samples")
print(f"Test set size: {len(test_df)} samples")
print(f"Training label distribution:\n{train_df['label'].value_counts()}")
print(f"Test label distribution:\n{test_df['label'].value_counts()}")

# Conver pandas dataframe to huggin face dataet objects
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)


Training set size: 14 samples
Test set size: 4 samples
Training label distribution:
label
positive    7
negative    7
Name: count, dtype: int64
Test label distribution:
label
negative    2
positive    2
Name: count, dtype: int64


In [4]:
# Load pre trained bert model
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

# function to tokenise the text
def tokenize_function(examples):
  return tokenizer(examples["text"], padding="max_length", truncation=True)

# apply tokenization to both training and test dataset
tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)
tokenized_test_dataset = test_dataset.map(tokenize_function, batched=True)

# remove the original text and label columns as they are no longer need
tokenized_train_dataset = tokenized_train_dataset.remove_columns(["text", "label", "__index_level_0__"])
tokenized_test_dataset = tokenized_test_dataset.remove_columns(["text", "label", "__index_level_0__"])

# Remove label id
tokenized_train_dataset = tokenized_train_dataset.rename_column("label_id", "labels")
tokenized_test_dataset = tokenized_test_dataset.rename_column("label_id", "labels")

# set the toekniser to pytorch sensors
tokenized_train_dataset.reset_format()

print("\n--- Tokenization Complete ---")
print(f"Example of tokenized training input_ids:\n{tokenized_train_dataset[0]['input_ids']}")
print(f"Example of tokenized training attention_mask:\n{tokenized_train_dataset[0]['attention_mask']}")
print(f"Example of tokenized training label:\n{tokenized_train_dataset[0]['labels']}")

# Set it back to torch for training
tokenized_train_dataset.set_format("torch")
tokenized_test_dataset.set_format("torch")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Map:   0%|          | 0/14 [00:00<?, ? examples/s]

Map:   0%|          | 0/4 [00:00<?, ? examples/s]


--- Tokenization Complete ---
Example of tokenized training input_ids:
[101, 1996, 2833, 2001, 12090, 1998, 1996, 2572, 15599, 3401, 3819, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [5]:
# load the pre trained transformer bert model

# 'num_labels=2' because we have two classes (e.g., positive and negative sentiment)
model = AutoModelForSequenceClassification.from_pretrained(
    'bert-base-uncased',           # Pretrained BERT model checkpoint
    num_labels=2                   # Number of target classes for classification
)


# defining training arguments
training_args=TrainingArguments(
    output_dir='./results',   # path where the model checkpoints and logs will be saved
    num_train_epochs=5,       # number of times the model will pass through the enitre dataset
    per_device_train_batch_size=4, # batch size used during training per GPU/CPU
    per_device_eval_batch_size=4,  # batch size used during evaluation per GPU/CPU
    warmup_steps=100,             # number of steps to linearly increase the learning rate
    weight_decay=0.01,  # strngth of weight decay to prevent overfitting
    logging_dir='./logs',                      # Directory to store training logs (e.g., for TensorBoard)
    logging_steps=1,                           # Log training metrics every N steps
    report_to="none",                         # Disable reporting to third-party services like WandB
    learning_rate=2e-5,                        # Initial learning rate for the Adam optimizer
)

# confirm where the model would be trained GPU or CPU
print("\n--- Model Loaded & Training Arguments Defined ---")
print(f"Model will be trained on: {'CUDA (GPU)' if next(model.parameters()).is_cuda else 'CPU'}")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



--- Model Loaded & Training Arguments Defined ---
Model will be trained on: CPU


In [None]:
# f1 score, precision, accuarcy

In [6]:
# Function to compute metrics for evaluation
def compute_metrics(p):
    # 'p' is a tuple containing (predictions, labels) from the model
    predictions, labels = p
    # Get the predicted class by taking the argmax (index of max probability)
    predictions = np.argmax(predictions, axis=1)

    # Calculate standard classification metrics
    # We use 'average='binary'' for binary classification (two classes)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
    acc = accuracy_score(labels, predictions)

    # Return as a dictionary, as expected by the Trainer
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Initialize the Trainer
# The Trainer orchestrates the entire training and evaluation loop.
trainer = Trainer(
    model=model,                         # The pre-trained model to fine-tune
    args=training_args,                  # Our defined training parameters
    train_dataset=tokenized_train_dataset, # The training dataset
    eval_dataset=tokenized_test_dataset,   # The evaluation dataset
    tokenizer=tokenizer,                 # The tokenizer (used for padding/truncation within Trainer)
    compute_metrics=compute_metrics      # Our custom function for computing metrics
)

print("\n--- Trainer Initialized ---")


--- Trainer Initialized ---


  trainer = Trainer(


In [11]:
pip install numpy==1.26.4

Collecting numpy==1.26.4
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m40.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.26.4 which is incompatible.[0m[31m
[0mSuccessfully installed numpy-1.26.4


In [7]:
print("\n--- Starting Model Fine-tuning ---")
# Start the training process
trainer.train()

print("\n--- Fine-tuning Complete ---")

# Evaluate the model on the test set one final time (loads the best model found during training)
final_results = trainer.evaluate(eval_dataset=tokenized_test_dataset)
print("\n--- Final Evaluation Results on Test Set (Best Model) ---")
print(final_results)


--- Starting Model Fine-tuning ---


Step,Training Loss
1,0.6682
2,0.6725
3,0.7094
4,0.7144
5,0.7624
6,0.761
7,0.6897
8,0.7431
9,0.6417
10,0.6434



--- Fine-tuning Complete ---



--- Final Evaluation Results on Test Set (Best Model) ---
{'eval_loss': 0.6695682406425476, 'eval_accuracy': 0.75, 'eval_f1': 0.8, 'eval_precision': 0.6666666666666666, 'eval_recall': 1.0, 'eval_runtime': 6.7705, 'eval_samples_per_second': 0.591, 'eval_steps_per_second': 0.148, 'epoch': 5.0}
