In [1]:
import pandas as pd

# Load data
data = pd.read_csv("NewTikTokData.csv")

# Preview data
print(data.head())


         FileName Label                                      Transcription
0  file1-Fake.mp4  fake  news Disneyworld officially removed the drinki...
1  file2-Fake.mp4  fake  information it's game on for everyone at first...
2  file3-Fake.mp4  fake  how bad is 5G for your health while you're her...
3  file4-Fake.mp4  fake  so 5G technology if you notice before they put...
4  file5-Fake.mp4  fake  so if you think this is going to be no big dea...


In [3]:
import json

# Function to create a JSONL entry
def create_jsonl_entry(row):
    return {
        "messages": [
            {"role": "system", "content": "You are a helpful assistant that classifies transcripts of TikTok videos as 'fake' (if it contains fake information), 'real' (if it contains some information but is likely true), or 'safe' (if it contains no claims)."},
            {"role": "user", "content": "This TikTok says: " + str(row['Transcription']) + "\nClassify it as:"},
            {"role": "assistant", "content": row['Label']}
        ]
    }

# Apply the function to each row and write to a JSONL file
with open("fine_tune_data_TIKTOK.jsonl", "w") as f:
    for _, row in data.iterrows():
        jsonl_entry = create_jsonl_entry(row)
        f.write(json.dumps(jsonl_entry) + "\n")

In [6]:
from openai import OpenAI
import os

openai = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],
)

In [9]:
file = openai.files.create(file=open("fine_tune_data_TIKTOK.jsonl", "rb"), purpose='fine-tune')
print(file)

FileObject(id='file-ElZ0TjuT0iCwMZUkCmOpHC3z', bytes=17099, created_at=1730562383, filename='fine_tune_data_TIKTOK.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)


In [10]:
fine_tune = openai.fine_tuning.jobs.create(training_file='file-ElZ0TjuT0iCwMZUkCmOpHC3z', model="gpt-4o-mini-2024-07-18")
print(fine_tune)

FineTuningJob(id='ftjob-vxeAE1VkAieKxwTn2QVhKbMX', created_at=1730562391, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs='auto', batch_size='auto', learning_rate_multiplier='auto'), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-qbB7poxzsbQf8xgCn62RqYk5', result_files=[], seed=1297693166, status='validating_files', trained_tokens=None, training_file='file-ElZ0TjuT0iCwMZUkCmOpHC3z', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None)


In [11]:
import os

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/home/teo/Downloads/psyched-freedom-182221-658209fdfd55.json"

import time
from google.cloud import videointelligence_v1 as videointelligence



def transcribe_video(file_path):
    # Initialize the Video Intelligence API client
    client = videointelligence.VideoIntelligenceServiceClient()

    # Read the file and load it into the API
    with open(file_path, "rb") as video_file:
        input_content = video_file.read()

    # Configure the request for speech transcription
    features = [videointelligence.Feature.SPEECH_TRANSCRIPTION]
    config = videointelligence.SpeechTranscriptionConfig(
        language_code="en-US"  # Adjust language if needed
    )
    video_context = videointelligence.VideoContext(speech_transcription_config=config)

    # Start the operation
    operation = client.annotate_video(
        request={"features": features, "input_content": input_content, "video_context": video_context}
    )

    print(f"Started transcription for {file_path}... Waiting for completion.")

    # Poll the operation status until it completes
    while not operation.done():
        print("Waiting for transcription to finish...")
        time.sleep(10)  # Wait 10 seconds between checks

    # Process results after the operation is done
    if operation.result():
        result = operation.result()
        transcription_text = ""
        for annotation in result.annotation_results[0].speech_transcriptions:
            for alternative in annotation.alternatives:
                transcription_text += alternative.transcript + " "
        return transcription_text.strip()
    else:
        print(f"Transcription failed for {file_path}")
        return ""


In [19]:
testTranscript = transcribe_video("file12-Real.mp4")

response = openai.chat.completions.create(
    model="ft:gpt-4o-mini-2024-07-18:personal::APAiF7vv",  # Replace with your fine-tuned model ID
    messages=[
        {"role": "system", "content": "You are a helpful assistant that classifies transcripts of TikTok videos as 'fake' (if it contains fake information), 'real' (if it contains some information but is likely true), or 'safe' (if it contains no claims)."},
        {"role": "user", "content": "This TikTok says: " + str(testTranscript) + "\nClassify it as:"},
        {"role": "assistant", "content": row['Label']}
    ]
)

print(response.choices[0].message.content)
classification = response.choices[0].message.content.strip()

#classification = response.choices[0]['message']['content'].strip()
#print("Classification:", classification)

Started transcription for file12-Real.mp4... Waiting for completion.
Waiting for transcription to finish...
Waiting for transcription to finish...
Waiting for transcription to finish...
real


In [22]:
if classification == "fake" or classification == "real":
    response2 = openai.chat.completions.create(
        model="ft:gpt-4o-mini-2024-07-18:personal::APAiF7vv",  # Replace with your fine-tuned model ID
        messages=[
            {"role": "system", "content": "You think a TikTok video (for which I'll give you a transcript) likely contains some questionable claims (which may be true or false!). Give me a summary on what the claims exactly are and give me sources to support whether you think they're true or false! Sources need to be real, clickable links that you have researched. They need to be relevant and trustworthy (authoritive)."},
            {"role": "user", "content": "This TikTok says: " + str(testTranscript) + "\nI think it's " + classification + ". Give me a summary on questionable claims and sources where I can verify them. Also give me your overall opinion: is it fake or real?"},
            {"role": "assistant", "content": row['Label']}
        ]
    )
    
    print(response2.choices[0].message.content)

**Summary of Questionable Claims:**

1. **Jumping before Impact**: The claim states that jumping just before the elevator hits the ground will not save your life and could lead to severe injuries instead. This emphasizes the difficulty of timing the jump correctly when the elevator is already falling quickly.

2. **Best Positioning for Survival**: It suggests that lying flat on the floor and protecting your head with hands or a bag will increase your chances of surviving the fall, as it helps distribute impact forces.

---

**Sources for Verification:**

1. **Jumping in a Falling Elevator**:
   - *NPR* discusses the fallacy of the jump:
     [Why You Can't Jump to Safety in a Falling Elevator](https://www.npr.org/sections/money/2020/12/30/951088836/why-you-cant-jump-to-safety-in-a-falling-elevator)

2. **Survival Position in a Falling Elevator**:
   - *HowStuffWorks* elaborates on the best positions in case of an elevator fall:
     [What Should You Do If an Elevator is Falling?](https