In [36]:
# !pip install openai

In [37]:
from IPython.display import Markdown, display
from openai import OpenAI
import os

In [38]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [39]:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

response = client.chat.completions.create(
  model="gpt-4o-mini",
  messages=[
    {"role": "system", "content": "You are a great philosopher."},
    {"role": "user", "content": "What is the meaning of life?"}
  ]
)
display(Markdown(response.choices[0].message.content))

The meaning of life is a profound question that has been contemplated by philosophers, theologians, and thinkers throughout history. Different perspectives offer various interpretations:

1. **Existentialism**: This viewpoint, associated with philosophers like Jean-Paul Sartre and Albert Camus, suggests that life has no inherent meaning, and it is up to individuals to create their own purpose through choices and actions.

2. **Religious Perspectives**: Many religious traditions propose that the meaning of life is defined by a connection to the divine, moral conduct, and the pursuit of spiritual fulfillment. For instance, in Christianity, the purpose of life might be seen as serving God and practicing love and compassion toward others.

3. **Humanism**: From a humanistic standpoint, the meaning of life can be found in the pursuit of knowledge, the cultivation of relationships, and the betterment of society. Humanists often emphasize the importance of human experiences, emotions, and ethics.

4. **Buddhism**: In Buddhist philosophy, life is often viewed through the lens of suffering and the quest for enlightenment. The meaning of life may involve recognizing and transcending suffering through practices like mindfulness and compassion.

5. **Scientific Perspective**: Some may argue that from a biological standpoint, the meaning of life is to survive and reproduce, ensuring the continuation of our species.

Ultimately, the meaning of life can be a deeply personal journey. It may vary significantly from one individual to another, shaped by personal experiences, beliefs, and aspirations. Finding meaning often involves exploration, introspection, and engagement with the world around us.

In [41]:
import pandas as pd
import json
from sklearn.model_selection import train_test_split

# Load the CSV file with the correct delimiter
file_path = '/content/drive/MyDrive/Reddit_Title.csv'  # Change this to your local path
data = pd.read_csv(file_path, sep=';')

# Clean up and drop unnecessary columns, and select the top 200 rows
data_cleaned = data[['title', 'label']].head(200)

# Mapping the 'label' column to more human-readable text
label_mapping = {0: "non-stress", 1: "stress"}
data_cleaned['label'] = data_cleaned['label'].map(label_mapping)

# Split the data into training and validation sets (80% train, 20% validation)
train_data, validation_data = train_test_split(data_cleaned, test_size=0.2, random_state=42)

def save_to_jsonl(data, output_file_path):
    jsonl_data = []
    for index, row in data.iterrows():
        jsonl_data.append({
            "messages": [
                {"role": "system", "content": "Given a social media post, classify whether it indicates 'stress' or 'non-stress'."},
                {"role": "user", "content": row['title']},
                {"role": "assistant", "content": f"\"{row['label']}\""}
            ]
        })

    # Save to JSONL format
    with open(output_file_path, 'w') as f:
        for item in jsonl_data:
            f.write(json.dumps(item) + '\n')

# Save the training and validation sets to separate JSONL files
train_output_file_path = 'stress_detection_train.jsonl'
validation_output_file_path = 'stress_detection_validation.jsonl'

save_to_jsonl(train_data, train_output_file_path)
save_to_jsonl(validation_data, validation_output_file_path)

print(f"Training dataset save to {train_output_file_path}")
print(f"Validation dataset save to {validation_output_file_path}")

Training dataset save to stress_detection_train.jsonl
Validation dataset save to stress_detection_validation.jsonl


In [42]:
train_file = client.files.create(
  file=open(train_output_file_path, "rb"),
  purpose="fine-tune"
)

valid_file = client.files.create(
  file=open(validation_output_file_path, "rb"),
  purpose="fine-tune"
)

print(f"Training file Info: {train_file}")
print(f"Validation file Info: {valid_file}")

Training file Info: FileObject(id='file-3XuhnuqiXdUoTZsyblphNNaY', bytes=50323, created_at=1731238031, filename='stress_detection_train.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)
Validation file Info: FileObject(id='file-wFus99DwtDaaZFxjYqjd7QqZ', bytes=12724, created_at=1731238031, filename='stress_detection_validation.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)


In [43]:
model = client.fine_tuning.jobs.create(
  training_file=train_file.id,
  validation_file=valid_file.id,
  model="gpt-4o-mini-2024-07-18",
  hyperparameters={
    "n_epochs": 3,
	"batch_size": 3,
	"learning_rate_multiplier": 0.3
  }
)
job_id = model.id
status = model.status

print(f'Fine-tuning model with jobID: {job_id}.')
print(f"Training Response: {model}")
print(f"Training Status: {status}")

Fine-tuning model with jobID: ftjob-LJ7VG3rROwBD4ZwcPEnleiBO.
Training Response: FineTuningJob(id='ftjob-LJ7VG3rROwBD4ZwcPEnleiBO', created_at=1731238047, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs=3, batch_size=3, learning_rate_multiplier=0.3), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-PB66izrMdR2HiHnmerl1U9WY', result_files=[], seed=1464939079, status='validating_files', trained_tokens=None, training_file='file-3XuhnuqiXdUoTZsyblphNNaY', validation_file='file-wFus99DwtDaaZFxjYqjd7QqZ', estimated_finish=None, integrations=[], user_provided_suffix=None)
Training Status: validating_files


In [44]:
# Retrieve the state of a fine-tune
client.fine_tuning.jobs.retrieve(job_id)

FineTuningJob(id='ftjob-LJ7VG3rROwBD4ZwcPEnleiBO', created_at=1731238047, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs=3, batch_size=3, learning_rate_multiplier=0.3), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-PB66izrMdR2HiHnmerl1U9WY', result_files=[], seed=1464939079, status='validating_files', trained_tokens=None, training_file='file-3XuhnuqiXdUoTZsyblphNNaY', validation_file='file-wFus99DwtDaaZFxjYqjd7QqZ', estimated_finish=None, integrations=[], user_provided_suffix=None)

In [50]:
result = client.fine_tuning.jobs.list()

# Retrieve the fine tuned model
fine_tuned_model = result.data[0].fine_tuned_model
print(fine_tuned_model)

ft:gpt-4o-mini-2024-07-18:personal::AS0ZE3Pv


In [51]:
completion = client.chat.completions.create(
  model = fine_tuned_model,
  messages=[
    {"role": "system", "content": "Given a social media post, classify whether it indicates 'stress' or 'non-stress'."},
    {"role": "user", "content": "Just went to my first homecoming, and they played a song I've always wanted to dance to at an official dance. Sorry for the terrible quality, but my happiness in this moment couldn't be exaggerated!"}
  ]
)
print(completion.choices[0].message.content)

"non-stress"


In [52]:
def predict(test, model):
    y_pred = []
    categories = ["non-stress", "stress"]

    for index, row in test.iterrows():
        response = client.chat.completions.create(
            model=model,
            messages=[
                {
                    "role": "system",
                    "content": "Given a social media post, classify whether it indicates 'stress' or 'non-stress'.",
                },
                {"role": "user", "content": row["title"]},
            ],
        )

        answer = response.choices[0].message.content

        # Determine the predicted category

        for category in categories:
            if category.lower() in answer.lower():
                y_pred.append(category)
                break
        else:
            y_pred.append("none")
    return y_pred

In [53]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import numpy as np


def evaluate(y_true, y_pred):
    labels = ["non-stress", "stress"]
    mapping = {label: idx for idx, label in enumerate(labels)}

    def map_func(x):
        return mapping.get(
            x, -1
        )  # Map to -1 if not found, but should not occur with correct data

    y_true_mapped = np.vectorize(map_func)(y_true)
    y_pred_mapped = np.vectorize(map_func)(y_pred)

    # Calculate accuracy

    accuracy = accuracy_score(y_true=y_true_mapped, y_pred=y_pred_mapped)
    print(f"Accuracy: {accuracy:.3f}")

    # Generate accuracy report

    unique_labels = set(y_true_mapped)  # Get unique labels

    for label in unique_labels:
        label_indices = [
            i for i in range(len(y_true_mapped)) if y_true_mapped[i] == label
        ]
        label_y_true = [y_true_mapped[i] for i in label_indices]
        label_y_pred = [y_pred_mapped[i] for i in label_indices]
        label_accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f"Accuracy for label {labels[label]}: {label_accuracy:.3f}")
    # Generate classification report

    class_report = classification_report(
        y_true=y_true_mapped,
        y_pred=y_pred_mapped,
        target_names=labels,
        labels=list(range(len(labels))),
    )
    print("\nClassification Report:")
    print(class_report)

    # Generate confusion matrix

    conf_matrix = confusion_matrix(
        y_true=y_true_mapped, y_pred=y_pred_mapped, labels=list(range(len(labels)))
    )
    print("\nConfusion Matrix:")
    print(conf_matrix)


In [54]:
y_pred = predict(validation_data, "gpt-4o-mini-2024-07-18")
y_true = validation_data["label"]
evaluate(y_true, y_pred)

Accuracy: 0.950
Accuracy for label non-stress: 1.000
Accuracy for label stress: 0.905

Classification Report:
              precision    recall  f1-score   support

  non-stress       0.90      1.00      0.95        19
      stress       1.00      0.90      0.95        21

    accuracy                           0.95        40
   macro avg       0.95      0.95      0.95        40
weighted avg       0.95      0.95      0.95        40


Confusion Matrix:
[[19  0]
 [ 2 19]]


In [55]:
y_pred = predict(validation_data,fine_tuned_model)
evaluate(y_true, y_pred)

Accuracy: 1.000
Accuracy for label non-stress: 1.000
Accuracy for label stress: 1.000

Classification Report:
              precision    recall  f1-score   support

  non-stress       1.00      1.00      1.00        19
      stress       1.00      1.00      1.00        21

    accuracy                           1.00        40
   macro avg       1.00      1.00      1.00        40
weighted avg       1.00      1.00      1.00        40


Confusion Matrix:
[[19  0]
 [ 0 21]]
