In [1]:
from openai import OpenAI
from google.colab import userdata
my_sk = userdata.get('openaikey')
import csv
import json
import random

In [2]:
client = OpenAI(api_key=my_sk)

In [4]:
# load csv of YouTube comments

comment_list = []
response_list = []

with open('finetuning_traffic_demo_QA.csv', mode ='r') as file:
    file = csv.reader(file)

    # read file line by line
    for line in file:
        # skip first line
        if line[0]=='Comment':
            continue

        # append comments and responses to respective lists
        comment_list.append(line[0])
        response_list.append(line[1])

In [5]:
len(comment_list)

107

In [7]:
# construct training examples
example_list = []

intstructions_string_few_shot = """
You are RoadRisk-GPT, a concise driving-safety advisor.

TASK
• Read a single user sentence that describes an upcoming trip
  (vehicle type, distance, weather, roadway, traffic, time of day, and/or driver state).
• Return ONE short line containing:
    1. A risk label from this fixed set
       {Very High Risk | High Risk | Moderate Risk | Low Risk | Very Low Risk}
    2. A brief recommendation (≤ 20 words) that either
       – confirms it is safe to drive, or
       – suggests waiting, choosing another mode, or taking extra caution.
• Do NOT give mechanical instructions (brake, gear, tire, etc.).
• Keep a calm, neutral tone—no emojis or moral judgments.

EXAMPLES (“Comment” → “Assistant response”)

Q: I'm driving my compact SUV to a coffee shop five minutes away at 10 a.m.; skies clear, roads dry. Any risk?
A: Low Risk — routine short trip; obey the limit and stay alert.

Q: I'm taking a midsize sedan to the hardware store in light suburban traffic, pavement dry. Safe?
A: Low Risk — light traffic; maintain following distance and obey signals.

Q: I'm commuting on a clear morning by hatchback along a well-marked bike lane. Risk level?
A: Low Risk — conditions are normal; watch for cyclists when turning.

Q: I'm driving a minivan to the mall at 1 p.m.; parking lot may be busy. Risk?
A: Low Risk — go slow in the lot and watch pedestrians.

Q: I'm using a hybrid sedan on a divided highway with moderate flow, dry surface. Safe to proceed?
A: Low Risk — ideal highway conditions; keep to posted speed.

(Use the same response style and label set for all future queries.)
"""


for i in range(len(comment_list)):
    system_dict = {"role": "system", "content": intstructions_string_few_shot}
    user_dict = {"role": "user", "content": comment_list[i]}
    assistant_dict = {"role": "assistant", "content": response_list[i]}

    messages_list = [system_dict, user_dict, assistant_dict]

    example_list.append({"messages": messages_list})

In [8]:
# create train/validation split
validation_index_list = random.sample(range(0, len(example_list)-1), 9)

validation_data_list = [example_list[index] for index in validation_index_list]

for example in validation_data_list:
    example_list.remove(example)

In [10]:
# write examples to file
with open('data/training-data-traffic.jsonl', 'w') as training_file:
    for example in example_list:
        json.dump(example, training_file)
        training_file.write('\n')

with open('data/validation-data-traffic.jsonl', 'w') as validation_file:
    for example in validation_data_list:
        json.dump(example, validation_file)
        validation_file.write('\n')

In [12]:
training_file = client.files.create(
  file = open("data/training-data-traffic.jsonl", "rb"),
  purpose = "fine-tune"
)

validation_file = client.files.create(
  file = open("data/validation-data-traffic.jsonl", "rb"),
  purpose = "fine-tune"
)

In [13]:
client.fine_tuning.jobs.create(
    training_file = training_file.id,
    validation_file = validation_file.id,
    suffix = "trafficGPT",
    model = "gpt-3.5-turbo"
)

FineTuningJob(id='ftjob-k8FRh7BGYlo7mcCpt6h0Emps', created_at=1751490547, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs='auto'), model='gpt-3.5-turbo-0125', object='fine_tuning.job', organization_id='org-aiRbOA5mzdzx0tXLWxJJ5LsS', result_files=[], seed=1298845134, status='validating_files', trained_tokens=None, training_file='file-U5j7WMeBscHKrJaPNnb6W7', validation_file='file-BinRDN5G1w9h8Nx96fNBmw', estimated_finish=None, integrations=[], metadata=None, method=Method(type='supervised', dpo=None, reinforcement=None, supervised=SupervisedMethod(hyperparameters=SupervisedHyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs='auto'))), user_provided_suffix='trafficGPT', usage_metrics=None, shared_with_openai=False, eval_id=None)

In [15]:
test_comment = "“I’ve had three beers and need to drive my pickup across an icy bridge at midnight. Should I risk it?”"
#  “I took cold medicine that makes me drowsy and need to drive 30 miles on the freeway; is that okay?”
#  “I’ll be driving at dawn with sun glare straight ahead on a rural highway at the speed limit; safe?”
#  “I plan to drive one block to the mailbox on an empty street at noon—safe?”
#  “I’m taking a hybrid on a divided highway with light traffic and clear weather—safe to go?”

In [16]:
response = client.chat.completions.create(
    model="ft:gpt-3.5-turbo-0125:personal:trafficgpt:BozBEGKl",
    messages=[
    {"role": "system", "content": intstructions_string_few_shot},
    {"role": "user", "content": test_comment}
    ]
)

In [17]:
print(dict(response)['choices'][0].message.content)

Very high risk—alcohol and ice make this extremely dangerous. Get a sober ride or wait until conditions improve.


In [None]:
# delete file
client.files.delete(training_file.id)
client.files.delete(validation_file.id)