Step 1: Download the IMDB Dataset

In [None]:
import pandas as pd

# Load the dataset
dataset_path = '/kaggle/input/imdb-dataset-of-50k-movie-reviews/IMDB Dataset.csv'
df = pd.read_csv(dataset_path)

# Display basic information about the dataset
print(df.info())
print(df.head())

Step 2: Data Preprocessing

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the dataset
dataset_path = '/kaggle/input/imdb-dataset-of-50k-movie-reviews/IMDB Dataset.csv'
df = pd.read_csv(dataset_path)

# Encode sentiment column
df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})

# Retain only the review and label columns
df = df[['review', 'sentiment']]

# Split the data into training, validation, and testing sets
train_data, temp_data = train_test_split(df, test_size=0.2, random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42)

# Display basic information about the dataset
print(df.info())
print(df.head())

Step 3: Model Selection and Tokenization

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
# Select pre-trained model and tokenizer
model_name = 'distilbert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_and_format(examples):
    tokens = tokenizer(examples['review'], truncation=True, padding='max_length', max_length=256)
    tokens['labels'] = examples['sentiment']  # Assign labels explicitly
    return tokens

train_dataset = Dataset.from_pandas(train_data).map(tokenize_and_format, batched=True)
val_dataset = Dataset.from_pandas(val_data).map(tokenize_and_format, batched=True)
test_dataset = Dataset.from_pandas(test_data).map(tokenize_and_format, batched=True)

Step 4: Fine-Tune the Model

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Load pre-trained model
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=1,
    report_to='none',
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    evaluation_strategy='epoch',
    save_strategy='epoch',
    learning_rate=5e-5,
    logging_dir='./logs',
    logging_steps=10,
)

# Define evaluation metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
    return {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1}

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

In [None]:
# Fine-tune the model
trainer.train()

# Display basic information about the dataset
print(df.info())
print(df.head())

Step 5: Save and Upload the Model to Hugging Face

In [None]:
model_save_path = "/kaggle/working/imdb_distilbert"
model.save_pretrained(model_save_path)
tokenizer.save_pretrained(model_save_path)

In [None]:
!pip install huggingface_hub

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
from huggingface_hub import whoami

print(whoami())

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model_save_path = "/kaggle/working/imdb_distilbert"  # Your saved model path
model = AutoModelForSequenceClassification.from_pretrained(model_save_path)
tokenizer = AutoTokenizer.from_pretrained(model_save_path)

In [None]:
from huggingface_hub import HfApi

# Define the repo name (it will be created under your Hugging Face account)
repo_name = 'distilbert_IMDB'  # Change this to your preferred name

# Push model and tokenizer to the Hugging Face Hub
model.push_to_hub(repo_name)
tokenizer.push_to_hub(repo_name)

print(f"Model uploaded to: https://huggingface.co/Krati132/{repo_name}")

Part 2: API Development and Testing

Step 6: Set Up the Backend API

In [None]:
!pip install fastapi uvicorn transformers torch sentencepiece accelerate

In [None]:
from fastapi import FastAPI, HTTPException
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
import torch

# Set up Hugging Face authentication
HUGGINGFACE_TOKEN = "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"

# Load Llama-3-8B model and tokenizer
MODEL_NAME = "meta-llama/Meta-Llama-3-8B"

try:
    tokenizer = AutoTokenizer.from_pretrained(
        MODEL_NAME, use_auth_token=HUGGINGFACE_TOKEN
    )
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME, torch_dtype=torch.float16, device_map="auto", use_auth_token=HUGGINGFACE_TOKEN
    )
    sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
except Exception as e:
    raise RuntimeError(f"Failed to load model: {str(e)}")

# Initialize FastAPI
app = FastAPI()

@app.get("/")
def home():
    return {"message": "Llama-3 Sentiment Analysis API is running!"}

@app.post("/analyze/")
def analyze_sentiment(text: str):
    try:
        result = sentiment_pipeline(text)
        return {"text": text, "sentiment": result}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# Run with: uvicorn fastapi_llama3:app --host 0.0.0.0 --port 8000


Kaggle doesn't support fastAPI running directly and I don't have any runtime on collab. So I cannot run the API. Here is alternative-

In [None]:
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
import torch

# Set up Hugging Face authentication
HUGGINGFACE_TOKEN = "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"

# Load Llama-3-8B model and tokenizer
MODEL_NAME = "meta-llama/Meta-Llama-3-8B"

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_NAME, use_auth_token=HUGGINGFACE_TOKEN
)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, torch_dtype=torch.float16, device_map="auto", use_auth_token=HUGGINGFACE_TOKEN
)

# Load sentiment analysis pipeline
sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

# Example input text
text = "This is a great day!"

# Get the sentiment of the text
result = sentiment_pipeline(text)

# Print the result
print("Sentiment Analysis Result:", result)


Step 7: Load Models

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Load the fine-tuned model and tokenizer from Hugging Face
model_name = "Krati132/distilbert_IMDB"

try:
    # Load model and tokenizer
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    print("Model loaded successfully")
except Exception as e:
    print(f"Failed to load model: {str(e)}")


In [None]:
!pip install groq requests

In [None]:
import requests

# Define the API endpoint and headers
url = "https://api.groq.com/openai/v1/chat/completions"
headers = {
    "Authorization": f"Bearer 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",
    "Content-Type": "application/json"
}

# Define the payload (input data for the model)
payload = {
    "model": "llama-guard-3-8b",  # Specify the model name
    "messages": [
        {"role": "user", "content": "What is the capital of France?"}
    ],
    "max_tokens": 50
}

# Send the request
response = requests.post(url, headers=headers, json=payload)

# Check the response
if response.status_code == 200:
    result = response.json()
    generated_text = result.get("choices", [{}])[0].get("message", {}).get("content", "").strip()
    print("Generated Text:", generated_text)
else:
    print("Error:", response.status_code, response.text)

This is what I've stored in main.py file-

from fastapi import FastAPI, Request
from transformers import pipeline

# Initialize the FastAPI app
app = FastAPI()

# Load the fine-tuned model from Hugging Face
model = pipeline('sentiment-analysis', model="Krati132/distilbert_IMDB")

@app.post("/analyze")
async def analyze(request: Request):
    # Parse the request body
    request_data = await request.json()
    text = request_data.get("text", "")

    # Use the model to predict sentiment
    result = model(text)
    
    return {"sentiment": result}

In [2]:
import requests
import json

# Replace with your actual ngrok URL or localhost if testing locally
ngrok_url = "https://f9af-84-249-11-218.ngrok-free.app/analyze"  # Change this if using ngrok

# Define the payload with the text you want to analyze
payload = {
    "text": "I love programming in Python!"  # Sample text
}

# Set the headers
headers = {
    "Content-Type": "application/json"
}

# Send the POST request
response = requests.post(ngrok_url, headers=headers, data=json.dumps(payload))

# Check if the request was successful
if response.status_code == 200:
    print("Response from API:", response.json())
else:
    print(f"Failed to connect. Status code: {response.status_code}")


Response from API: {'result': 'Success'}


I cannot use Google collab and I need GPU. So I used Ngrok for URL and save main.py which is running fast API methods. Its still taking too long to respond. Same with postman. I set the method the 'post' and gave it my ngrok url. And the wrote the foolwoing script-
{
    "model": "llama-3.3-70b-versatile",
    "messages": [
        {
            "role": "user",
            "content": "What is the capital of France"
        }
    ]
}

Here is the output-
Cloud Agent Error: Request is taking longer than 30 seconds to fulfill. Use another agent to send a request without time limitations.


Link to Git: https://github.com/kratipandya/Assignment3