Firstly, let's set up the OpenAI API

In [68]:
import pandas as pd

# Load data
data = pd.read_csv("tweetData.csv")

# Preview data
print(data.head())


                                        TweetContent Label
0  It's inspiring to see Rome harnessing AI for a...  safe
1  👹 AIRDROP second release more 10 000 GOBLINS 👹...  scam
2  I'm happy to see everyone enjoying @HeadsUp. W...  safe
3  Highest score I've seen! RT @Nessa Oh yeah! Fi...  safe
4  @Jake_Fitch What was your score? RT @trilog3te...  safe


We will now prepare the data for fine-tuning using OpenAI's GPT-4 model.

The fine-tuning will take the tweet content into account in order to classify a tweet as one of three types:
  - Likely safe
  - Likely scam
  - Likely fake news (with given source!)

In [69]:
import json

# Function to create a JSONL entry
def create_jsonl_entry(row):
    return {
        "messages": [
            {"role": "system", "content": "You are a helpful assistant that classifies tweets as either safe, scam, or fake-news."},
            {"role": "user", "content": "This tweet says: " + str(row['TweetContent']) + "\nClassify it as:"},
            {"role": "assistant", "content": row['Label']}
        ]
    }

# Apply the function to each row and write to a JSONL file
with open("fine_tune_data.jsonl", "w") as f:
    for _, row in data.iterrows():
        jsonl_entry = create_jsonl_entry(row)
        f.write(json.dumps(jsonl_entry) + "\n")

In [70]:
from openai import OpenAI
import os

openai = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],
)

In [71]:
file = openai.files.create(file=open("fine_tune_data.jsonl", "rb"), purpose='fine-tune')
print(file)

FileObject(id='file-9IsFJeEYeqXAvGeEPIAfH293', bytes=58529, created_at=1730523419, filename='fine_tune_data.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)


In [72]:
fine_tune = openai.fine_tuning.jobs.create(training_file='file-9IsFJeEYeqXAvGeEPIAfH293', model="gpt-4o-2024-08-06")
print(fine_tune)

FineTuningJob(id='ftjob-3hFpdnW1QH1MdprKHRPr1OBT', created_at=1730523427, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs='auto', batch_size='auto', learning_rate_multiplier='auto'), model='gpt-4o-2024-08-06', object='fine_tuning.job', organization_id='org-qbB7poxzsbQf8xgCn62RqYk5', result_files=[], seed=1879177518, status='validating_files', trained_tokens=None, training_file='file-9IsFJeEYeqXAvGeEPIAfH293', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None)


And let us test the model with a simple example

In [74]:
response = openai.chat.completions.create(
    model="ft:gpt-4o-2024-08-06:personal::AP0mZFnN",  # Replace with your fine-tuned model ID
    messages=[
        {"role": "system", "content": "You are a helpful assistant that classifies tweets as either safe, scam, or fake-news."},
        {"role": "user", "content": "This tweet says: 'What a world we built together!\n\nWhat is your favorite zone in all of Azeroth?'\nClassify it as:"}
    ]
)

print(response.choices[0].message.content)

#classification = response.choices[0]['message']['content'].strip()
#print("Classification:", classification)

safe
