Imports

In [19]:
import csv
import os
from together import Together
from dotenv import load_dotenv

Initial Prompt

In [20]:
prompt="""
You are a specialist in human-AI detection and have extensive experience in distinguishing between texts written by humans and those generated by AI.         
You will receive a CSV-formatted dataset with two columns: ID and Text. Each row is an entry containing an ID and a text passage. Your task is to classify each entry into one of two classes:
- Human: if you believe the text is written by a human.
- AI: if you believe the text is generated by an AI.

Your response must strictly follow this CSV format:
ID;Label
Use the same ID from the input. Only output the CSV rows without explanations, comments, or extra formatting. The separator is a semicolon `;`.

### Example Input:
ID;Text  
D3-1;[Text]  
D3-2;[Text]
### Example Output:
D3-1;Human  
D3-2;AI
---
"""

Dataset to Predict

In [21]:
fileContent = ""
with open("../../datasets/validations/inputs.csv", mode='r', encoding='utf-8') as file:
    fileContent = file.read()

prompt += "\n### Here is the input dataset:\n" + fileContent 

Zero Shot

In [22]:
load_dotenv()
client = Together(
    api_key = os.getenv("API_KEY")
)

response = client.chat.completions.create(
    model="deepseek-ai/DeepSeek-V3",
    messages=[{"role": "user", "content": prompt}],
)

results = response.choices[0].message.content

Create Predictions File

In [23]:
# Remove spaces to create csv
results = results.replace(" ", "")

# Write to file
with open("predictions.csv", "w") as f:
    f.write("ID;Label\n")    
    f.write(results)

Benchmark

In [24]:
# Function to read CSV and store data as a dictionary
def read_csv(file_path):
    with open(file_path, mode='r', encoding='utf-8') as infile:
        reader = csv.reader(infile, delimiter=';')
        next(reader)  # Skip the header
        return {row[0]: row[1] for row in reader}  # Create dictionary in a single line

# Function to calculate accuracy between two files
def calculate_accuracy(file1, file2):
    data1 = read_csv(file1)  # Read data from input file
    data2 = read_csv(file2)  # Read data from output file
    
    # Compare the files and count correct matches
    correct = sum(1 for id_value, label1 in data1.items() if data2.get(id_value) == label1)

    # Calculate accuracy as a percentage
    return (correct / len(data1)) * 100 if data1 else 0

# Define file paths
file1 = "../../datasets/validations/outputs.csv"
file2 = "predictions.csv"

# Calculate accuracy and print the result
accuracy = calculate_accuracy(file1, file2)
print(f"\nAccuracy: {accuracy:.2f}%")


Accuracy: 83.75%
