<a href="https://colab.research.google.com/github/tolossamuel/Icog-Intern/blob/main/Prompt_Engineering/FTVsPE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
from datasets import load_dataset, DatasetDict
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
import numpy as np
import evaluate
import pandas as pd
import warnings
warnings.filterwarnings('ignore')


In [3]:
# The 'imdb' dataset from Hugging Face is already split into train/test
dataset = load_dataset("imdb")


In [4]:
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    unsupervised: Dataset({
        features: ['text', 'label'],
        num_rows: 50000
    })
})

In [5]:
# For a real task, you would typically use the full dataset
small_train_dataset = dataset["train"].shuffle(seed=42).select(range(500))
small_test_dataset = dataset["test"].shuffle(seed=42).select(range(250))


In [6]:
# Create a new DatasetDict with the smaller subsets
small_dataset = DatasetDict({
    "train": small_train_dataset,
    "test": small_test_dataset
})

In [None]:
print("Dataset loaded and subset created:")
print(small_dataset)

Dataset loaded and subset created:
DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 500
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 250
    })
})


In [None]:
# Load the tokenizer for the chosen model
model_checkpoint = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [None]:
# Preprocessing function to tokenize the text
def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True, padding=True)

In [None]:
# Use batched=True to speed up the processing
tokenized_dataset = small_dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Map:   0%|          | 0/250 [00:00<?, ? examples/s]

In [None]:
tokenized_dataset = tokenized_dataset.rename_column("label", "labels")

In [None]:
# Set the format to PyTorch tensors
tokenized_dataset.set_format("torch")

In [None]:
print("\nDataset after tokenization and formatting:")
print(tokenized_dataset)
print("Example tokenized data entry:")
print(tokenized_dataset["train"][0])


Dataset after tokenization and formatting:
DatasetDict({
    train: Dataset({
        features: ['text', 'labels', 'input_ids', 'attention_mask'],
        num_rows: 500
    })
    test: Dataset({
        features: ['text', 'labels', 'input_ids', 'attention_mask'],
        num_rows: 250
    })
})
Example tokenized data entry:
{'text': 'There is no relation at all between Fortier and Profiler but the fact that both are police series about violent crimes. Profiler looks crispy, Fortier looks classic. Profiler plots are quite simple. Fortier\'s plot are far more complicated... Fortier looks more like Prime Suspect, if we have to spot similarities... The main character is weak and weirdo, but have "clairvoyance". People like to compare, to judge, to evaluate. How about just enjoying? Funny thing too, people writing Fortier looks American but, on the other hand, arguing they prefer American series (!!!). Maybe it\'s the language, or the spirit, but I think this series is more English than A

In [None]:
# --- 3. Model Selection & Fine-tuning ---
print("\nLoading the pre-trained model...")


Loading the pre-trained model...


In [None]:
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Define the metrics to evaluate during training
metric = evaluate.load("accuracy")

In [None]:
# Function to compute metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [None]:
# Define training arguments
# These control the training process and hyperparameters
training_args = TrainingArguments(
    output_dir="./results",          # Output directory for checkpoints and logs
    eval_strategy="epoch",     # Evaluate every epoch (Corrected argument name)
    learning_rate=2e-5,              # Learning rate
    per_device_train_batch_size=16,  # Batch size per GPU/CPU for training
    per_device_eval_batch_size=16,   # Batch size per GPU/CPU for evaluation
    num_train_epochs=3,              # Number of training epochs
    weight_decay=0.01,               # AdamW optimizer weight decay
    push_to_hub=False,               # Whether to push the model to the Hub
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,                # Log every 10 steps
    report_to="none",                # Disable reporting to services like W&B for this example
)

In [None]:
# The Trainer class handles the training loop
trainer = Trainer(
    model=model,                         # The model to train
    args=training_args,                  # The training arguments
    train_dataset=tokenized_dataset["train"], # The training dataset
    eval_dataset=tokenized_dataset["test"],   # The evaluation dataset
    compute_metrics=compute_metrics,     # The function to compute metrics
    tokenizer=tokenizer,                 # The tokenizer used for preprocessing
)


In [None]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,0.641,0.610159,0.768
2,0.4158,0.398839,0.852


Epoch,Training Loss,Validation Loss,Accuracy
1,0.641,0.610159,0.768
2,0.4158,0.398839,0.852
3,0.2788,0.367272,0.852


TrainOutput(global_step=96, training_loss=0.47251592328151065, metrics={'train_runtime': 5979.5597, 'train_samples_per_second': 0.251, 'train_steps_per_second': 0.016, 'total_flos': 198701097984000.0, 'train_loss': 0.47251592328151065, 'epoch': 3.0})

In [None]:
results = trainer.evaluate()
print(f"Evaluation results: {results}")

Evaluation results: {'eval_loss': 0.36727190017700195, 'eval_accuracy': 0.852, 'eval_runtime': 227.8185, 'eval_samples_per_second': 1.097, 'eval_steps_per_second': 0.07, 'epoch': 3.0}


`Prompt Engineering`

In [7]:
# library
import json
import os
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
import time
from google.colab import userdata

In [8]:
# gemini api key
api_key = userdata.get('GOOGLE_API_KEY')

In [9]:
# llm model
genai.configure(api_key=api_key)
# print("Gemini API Key configured successfully.")

In [10]:
# Select the Gemini model
# Other options: 'gemini-1.5-flash-latest', 'gemini-1.0-pro', etc.
ANALYSIS_MODEL_NAME = 'gemini-1.5-flash-latest'
RESPONSE_MODEL_NAME = 'gemini-1.5-flash-latest'

In [None]:
# Initialize the models
try:
    analysis_model = genai.GenerativeModel(ANALYSIS_MODEL_NAME)
    response_model = genai.GenerativeModel(RESPONSE_MODEL_NAME)
    print(f"Initialized Gemini models: {ANALYSIS_MODEL_NAME}, {RESPONSE_MODEL_NAME}")
except Exception as e:
    print(f"Error initializing Gemini models: {e}")
    exit()

Initialized Gemini models: gemini-1.5-flash-latest, gemini-1.5-flash-latest


In [None]:
# --- Updated LLM Call Function ---
def call_gemini_llm(prompt, model, model_name_for_log):
    """
    Calls the specified Gemini LLM model with the given prompt.
    Handles basic errors and safety blocking.
    """

    try:
        # Use generate_content for non-streaming
        response = model.generate_content(
            prompt
            )

        # Check for safety blocks or other issues before accessing text
        if not response.candidates:
             return ""

        # More detailed safety check (optional)
        first_candidate = response.candidates[0]
        try:
            response_text = response.text
            return response_text
        except ValueError as e:
             return "" # Indicate failure

    except Exception as e:
        return "" # Indicate failure

In [None]:
# These examples guide the LLM on the task and format
few_shot_examples = """
your task is identify the text as positive or negative and return only one of the two words.
example :
Review: This movie was fantastic! I loved every moment and the acting was superb.
Sentiment: Positive

Review: I couldn't stand this film. The plot was confusing and the characters were annoying.
Sentiment: Negative

Review: An absolute masterpiece of cinema. Highly recommended!
Sentiment: Positive

Review: The movie was boring and nothing really interesting happened.
Sentiment: Negative
"""

In [None]:
prompt_template = few_shot_examples + """
Identify the text as positive or negative and return only one of the two words,
Review: {review_text}
Sentiment:
"""

In [None]:
def generate_response(review_text):
    """
    Generates a draft response using the Gemini LLM with Few-Shot prompting.
    """

    if not review_text:
        return None

    # Convert analysis dict back to JSON string for the prompt


    prompt = prompt_template.format(review_text=review_text)
    # print(prompt)

    draft_response = call_gemini_llm(prompt, response_model, f"{RESPONSE_MODEL_NAME} (Response Few-Shot)")

    if draft_response is None:
        return None
    # Clean up potential leading/trailing whitespace or markers if necessary
    draft_response_cleaned = draft_response.strip()
    return draft_response_cleaned


In [None]:
def process_review_text(review_text):
    """
    Orchestrates the analysis and response generation process using Gemini.
    """

    # Handle empty input edge case
    if not review_text or not review_text.strip():
        return {"error": "Empty input feedback", "feedback": review_text}


    response = generate_response(review_text)
    time.sleep(1)

    if not response:
        # time.sleep(2) # Delay before potential retry
        return {"error": "Response generation failed", "feedback": review_text, "analysis": review_text.strip()}
    return response

In [1]:
small_dataset["train"]

NameError: name 'small_dataset' is not defined

In [None]:
test_cases = small_dataset["train"]["text"]
test_cases_result = small_dataset["train"]["label"]
maps = {"Positive": 1, "Negative":0}
results = []
print("\n\n--- Starting Test Cases ---")
for i, review_text in enumerate(test_cases):
    result = process_review_text(review_text)
    results.append(maps.get(result, 1))
    print(f"Test Case {i + 1}: {result}")
    # Add a delay between processing test cases to respect potential API rate limits
    if "error" not in result: # Only delay if successful calls were made
         time.sleep(2) # Adjust delay as needed based on API limits (free tier often has strict limits)



--- Starting Test Cases ---


In [None]:
test_cases_result

In [None]:
results