# Using DeepSeek API

In [None]:
from datasets import load_dataset

# Load the data
data = load_dataset("rotten_tomatoes")
data

In [18]:
import os
from dotenv import load_dotenv
from openai import OpenAI

# Load .env file and get the API key
load_dotenv()
api_key = os.getenv("DEEPSEEK_API_KEY")

# Setup the client with DeepSeek API base
client = OpenAI(
    api_key=api_key,
    base_url="https://api.deepseek.com/v1"  # important!
)

In [19]:
# Updated function using new v1 format
def chatgpt_generation(prompt, document, model="deepseek-chat"):
    """Generate an output based on a prompt and an input document."""
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt.replace("[DOCUMENT]", document)}
    ]

    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0
    )

    return response.choices[0].message.content

In [20]:
# Define the classification prompt
prompt = """Predict whether the following document is a positive or negative
movie review:

[DOCUMENT]

If it is positive return 1 and if it is negative return 0. Do not give any
other answers.
"""

# Input document
document = "unpretentious , charming , quirky , original"

# Call the function
result = chatgpt_generation(prompt, document)

print("Classification result:", result)  # Should return 1 (positive)

Classification result: 1


In [None]:
# pass the movie review to the function
import numpy as np
from tqdm import tqdm
    
predictions = [chatgpt_generation(prompt,doc) for doc in tqdm(data['train']['text'])]

100%|██████████| 8530/8530 [9:14:16<00:00,  3.90s/it]     


In [25]:
from sklearn.metrics import classification_report

def evaluate_performance(y_true, y_pred):
    performance = classification_report(
        y_true, y_pred,
        target_names=["Negative Review", "Positive Review"]
    )
    print(performance)


In [26]:
y_pred = [int(pred.strip()) for pred in predictions]  # Convert to integers if needed
y_true = data['train']['label']  # Make sure you're comparing train with train

evaluate_performance(y_true, y_pred)


                 precision    recall  f1-score   support

Negative Review       0.89      0.95      0.92      4265
Positive Review       0.95      0.89      0.92      4265

       accuracy                           0.92      8530
      macro avg       0.92      0.92      0.92      8530
   weighted avg       0.92      0.92      0.92      8530

