In [14]:
pip install requests pandas python-dotenv


Collecting python-dotenv
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Downloading python_dotenv-1.1.1-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.1.1


In [None]:
from google.colab import userdata
userdata.get('DEEPSEEK_API_KEY')

'sk-bf35454514c94634bbecb3ea5d158e77'

In [None]:
from google.colab import userdata
import os

# Get the API key from Colab Secrets
api_key_value = userdata.get('DEEPSEEK_API_KEY')

# Set the environment variable using %%env magic command
# This requires running this cell before the cell that uses os.getenv
os.environ['DEEPSEEK_API_KEY'] = api_key_value

print("Environment variable DEEPSEEK_API_KEY set.")

Environment variable DEEPSEEK_API_KEY set.


In [2]:
import os
import time
import csv
import re
import requests
import pandas as pd
from google.colab import userdata


# Load API key from Colab Secrets
DEEPSEEK_API_KEY = userdata.get("DEEPSEEK_API_KEY")

# Check if the API key is loaded (masking the actual key for security)
if DEEPSEEK_API_KEY:
    print(f"DeepSeek API key loaded successfully (starts with: {DEEPSEEK_API_KEY[:5]}...{DEEPSEEK_API_KEY[-5:]})")
else:
    print("DeepSeek API key not loaded. Please check your Colab secrets.")


def parse_options(options_str):
    """Parse options string into list of choices"""
    # Ensure the input is treated as a string
    options_str = str(options_str)
    return re.findall(r"'(.*?)'", options_str)

def ask_deepseek_r1(question, options):
    """Query DeepSeek R1 for MCQ answer"""
    choices_str = "\n".join([f"{chr(65+i)}. {opt}" for i, opt in enumerate(options)])
    prompt = f"""
    You are a medical expert answering multiple-choice questions.
    Strictly respond with ONLY the letter (A, B, C, etc.) of the correct option.
    Do not include explanations or additional text.

    Question: {question}
    Options:
    {choices_str}
    Answer:"""

    try:
        headers = {
            "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
            "Content-Type": "application/json"
        }
        payload = {
            "model": "deepseek-chat",
            "messages": [{"role": "user", "content": prompt}],
            "temperature": 0.0,
            "max_tokens": 5
        }
        response = requests.post(
            "https://api.deepseek.com/v1/chat/completions",
            headers=headers,
            json=payload
        )
        response.raise_for_status()
        content = response.json()["choices"][0]["message"]["content"].strip()
        return content[0] if content else "ERROR"
    except Exception as e:
        print(f"API Error: {e}")
        return "ERROR"

def process_csv(input_file, output_file):
    """Process CSV and generate results"""
    df = pd.read_csv(input_file, encoding='latin1')
    results = []

    for index, row in df.iterrows():
        question = row['question']
        # Convert options to string to handle potential non-string types (like float/NaN)
        options = parse_options(str(row['options']))
        correct_answer = row['answer']

        deepseek_answer = ask_deepseek_r1(question, options)
        match = "✅" if deepseek_answer == correct_answer else "❌"

        results.append({
            'Question': question,
            'Correct_Answer': correct_answer,
            'DeepSeek_Answer': deepseek_answer,
            'Match': match
        })

        print(f"Processed row {index+1}/{len(df)}: DeepSeek={deepseek_answer}, Correct={correct_answer}")
        time.sleep(1)  # Rate limit management

    result_df = pd.DataFrame(results)
    result_df.to_csv(output_file, index=False)
    print(f"Results saved to {output_file}")

# Example usage
if __name__ == "__main__":
    # TODO: Update the file path below to the correct path of your uploaded CSV file.
    process_csv(
        "/content/Reduced MMLU-PRO.csv", # <<-- Change this path
        "deepseek_results.csv"
    )

DeepSeek API key loaded successfully (starts with: sk-bf...58e77)
Processed row 1/162: DeepSeek=A, Correct=A
Processed row 2/162: DeepSeek=F, Correct=F
Processed row 3/162: DeepSeek=B, Correct=B
Processed row 4/162: DeepSeek=D, Correct=D
Processed row 5/162: DeepSeek=C, Correct=C
Processed row 6/162: DeepSeek=B, Correct=B
Processed row 7/162: DeepSeek=H, Correct=B
Processed row 8/162: DeepSeek=B, Correct=B
Processed row 9/162: DeepSeek=D, Correct=E
Processed row 10/162: DeepSeek=A, Correct=A
Processed row 11/162: DeepSeek=B, Correct=B
Processed row 12/162: DeepSeek=A, Correct=A
Processed row 13/162: DeepSeek=I, Correct=I
Processed row 14/162: DeepSeek=B, Correct=B
Processed row 15/162: DeepSeek=B, Correct=B
Processed row 16/162: DeepSeek=F, Correct=F
Processed row 17/162: DeepSeek=G, Correct=G
Processed row 18/162: DeepSeek=C, Correct=C
Processed row 19/162: DeepSeek=H, Correct=H
Processed row 20/162: DeepSeek=B, Correct=B
Processed row 21/162: DeepSeek=F, Correct=F
Processed row 22/16

In [15]:
import os
import time
import csv
import re
import requests
import pandas as pd
from dotenv import load_dotenv

# Load API key from .env file
load_dotenv()
DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY")

def parse_options(options_str):
    """Parse options string into list of choices"""
    return re.findall(r"'(.*?)'", options_str)

def ask_deepseek_r1(question, options):
    """Query DeepSeek R1 for MCQ answer"""
    choices_str = "\n".join([f"{chr(65+i)}. {opt}" for i, opt in enumerate(options)])
    prompt = f"""
    You are a medical expert answering multiple-choice questions.
    Strictly respond with ONLY the letter (A, B, C, etc.) of the correct option.
    Do not include explanations or additional text.

    Question: {question}
    Options:
    {choices_str}
    Answer:"""

    try:
        headers = {
            "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
            "Content-Type": "application/json"
        }
        payload = {
            "model": "deepseek-chat",
            "messages": [{"role": "user", "content": prompt}],
            "temperature": 0.0,
            "max_tokens": 5
        }
        response = requests.post(
            "https://api.deepseek.com/v1/chat/completions",
            headers=headers,
            json=payload
        )
        response.raise_for_status()
        content = response.json()["choices"][0]["message"]["content"].strip()
        return content[0] if content else "ERROR"
    except Exception as e:
        print(f"API Error: {e}")
        return "ERROR"

def process_csv(input_file, output_file):
    """Process CSV and generate results"""
    df = pd.read_csv(input_file)
    results = []

    for index, row in df.iterrows():
        question = row['question']
        options = parse_options(row['options'])
        correct_answer = row['answer']

        deepseek_answer = ask_deepseek_r1(question, options)
        match = "✅" if deepseek_answer == correct_answer else "❌"

        results.append({
            'Question': question,
            'Correct_Answer': correct_answer,
            'DeepSeek_Answer': deepseek_answer,
            'Match': match
        })

        print(f"Processed row {index+1}/{len(df)}: DeepSeek={deepseek_answer}, Correct={correct_answer}")
        time.sleep(1)  # Rate limit management

    result_df = pd.DataFrame(results)
    result_df.to_csv(output_file, index=False)
    print(f"Results saved to {output_file}")

# Example usage
if __name__ == "__main__":
    process_csv(
        "Reduced-MMLU-PRO-Sheet1.csv",
        "deepseek_results.csv"
    )


FileNotFoundError: [Errno 2] No such file or directory: 'Reduced-MMLU-PRO-Sheet1.csv'