In [1]:
import os
import pandas as pd
import json

In [3]:
train = "train.jsonl"
test = "test.jsonl"
valid = "valid.jsonl"

In [16]:
def set_prompt(title, post):
    system_prompt = """You are an AI assistant. Your primary function is to generate a concise "Too Long; Didn't Read" (TLDR) summary for the provided text."""
    user_prompt = ("You will be given a title and a post. Your goal is to create a very brief summary that captures the main idea, core situation, or the central question presented in the content.\n"
        "### Instructions for the summary:\n"
        " - Brevity is Key: The summary must be significantly shorter than the original post, typically 2-3 sentences.\n"
        " - Capture the Essence: Accurately convey the most critical information or the gist of the content.\n"
        " - Clarity and Simplicity: Write in clear, straightforward language that is easy to understand quickly.\n"
        " - Maintain Perspective: If the original post is written from a specific perspective (e.g., first-person personal narrative), the TLDR should generally reflect that perspective to concisely state the main point or dilemma.\n"
        " - Focus on Central Information: Extract the most important elements. Omit background details, minor points, and redundant information unless they are crucial to understanding the core message.\n"
        "Your output should be just the TLDR summary string.\n"
        f"### Input: \nTitle: {title}\nPost: {post}"
        )

    return system_prompt, user_prompt

In [12]:
train_df = pd.read_json(train, lines=True)
test_df = pd.read_json(test, lines=True)
valid_df = pd.read_json(valid, lines=True)
train_df.head()

Unnamed: 0,id,subreddit,title,post,summary
0,t3_1hxu8s,relationships,I (f/22) have to figure out if I want to still...,Not sure if this belongs here but it's worth a...,I still have contact with an old ex's friends ...
1,t3_33lz18,loseit,SV & NSV! Keeping on keeping on.,"30F, 5'6"". SW: 236 GW: 150 CW: 219\n\nI weigh ...","Progress is still happening, even when you thi..."
2,t3_38a85h,relationships,"Me [19F] with my friend [19M] 10 months, Insec...",What are your stories about insecurities you'v...,My skin is scarred badly; what could I do/say ...
3,t3_2as12n,personalfinance,Prioritize student debt or saving for down pay...,I have $25k in student debt. One private loan ...,$14k in student debt (all <5%) and need to sav...
4,t3_2bzi1g,relationships,My[25m] girlfriend [24f] is only nice and plea...,Throwaway\n\nI noticed the more I'm cold and d...,"GF is a meanie-bo-beanie when I'm nice, and an..."


In [20]:
new_train = []
for i in range(len(train_df)):
    title = train_df.title.iloc[i]
    post = train_df.post.iloc[i]
    summary = train_df.summary.iloc[i]

    system, user = set_prompt(title, post)
    # print(system)
    # print(user)
    # print("Summary: \n", summary)
    # print("-"*60)

    new_train.append({"system": system, "prompt": user, "output": summary})

In [21]:
new_test = []
for i in range(len(test_df)):
    title = test_df.title.iloc[i]
    post = test_df.post.iloc[i]
    summary = test_df.summary.iloc[i]

    system, user = set_prompt(title, post)
    # print(system)
    # print(user)
    # print("Summary: \n", summary)
    # print("-"*60)

    new_test.append({"system": system, "prompt": user, "output": summary})

In [22]:
new_valid = []
for i in range(len(valid_df)):
    title = valid_df.title.iloc[i]
    post = valid_df.post.iloc[i]
    summary = valid_df.summary.iloc[i]

    system, user = set_prompt(title, post)
    # print(system)
    # print(user)
    # print("Summary: \n", summary)
    # print("-"*60)

    new_valid.append({"system": system, "prompt": user, "output": summary})

In [23]:
new_train_df = pd.DataFrame(new_train)
new_test_df = pd.DataFrame(new_test)
new_valid_df = pd.DataFrame(new_valid)

In [24]:
new_train_df.head()

Unnamed: 0,system,prompt,output
0,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,I still have contact with an old ex's friends ...
1,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,"Progress is still happening, even when you thi..."
2,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,My skin is scarred badly; what could I do/say ...
3,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,$14k in student debt (all <5%) and need to sav...
4,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,"GF is a meanie-bo-beanie when I'm nice, and an..."


In [25]:
new_test_df.head()

Unnamed: 0,system,prompt,output
0,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,"I really like this guy, but after having sex w..."
1,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,Sons good friend died and his funeral is today...
2,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,Girl I'm seeing didn't respond to my texts whi...
3,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,"Tried to stop an old lady falling, kicked her ..."
4,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,Wife Cheats on me but I stuck around for kids....


In [26]:
new_valid_df.head()

Unnamed: 0,system,prompt,output
0,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,long relationship; fell in love with an other ...
1,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,"Mom had the TV on loud and woke me up, didn't ..."
2,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,My girlfriend kissed two guys at a Halloween p...
3,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,"I think pregnancy messed with my body, now I c..."
4,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,got startled for no reason by a normal dude ta...


In [27]:
new_train_df.to_csv("new_train.csv", index=False)
new_test_df.to_csv("new_test.csv", index=False)
new_valid_df.to_csv("new_valid.csv", index=False)

In [29]:
new_train_df.head()

Unnamed: 0,system,prompt,output
0,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,I still have contact with an old ex's friends ...
1,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,"Progress is still happening, even when you thi..."
2,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,My skin is scarred badly; what could I do/say ...
3,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,$14k in student debt (all <5%) and need to sav...
4,You are an AI assistant. Your primary function...,You will be given a title and a post. Your goa...,"GF is a meanie-bo-beanie when I'm nice, and an..."
