# Setup

In [1]:
import os
import pandas as pd

from pprint import pprint

from sklearn.metrics import accuracy_score

from skllm import MultiLabelZeroShotGPTClassifier
from skllm.config import SKLLMConfig

In [2]:
#See notes in INSTALL.md for how to set this, DO NOT HARD CODE YOUR API KEY HERE, 
# if your repository is public, then someone will steal your API key and make you pay for their shit 
SKLLMConfig.set_openai_key(os.environ.get('OPENAI_API_KEY'))

# Collect a Dataset

In [3]:
# Load the Aphorisms life tips dataset
data = pd.read_csv('data/myAphorisms.csv')

# Understand Your Data

In [4]:
data

Unnamed: 0,id,author,isOver18,postUrl,subreddit,postTitle,hasPostBody,postBody,score,numComments,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,HumanLabel
0,f6jt5e,w2555,False,https://reddit.com/r/LifeProTips/comments/f6jt5e/,LifeProTips,"""Hard work beats talent when talent doesn't wo...",True,I had a phone interview scheduled this morning...,147296,4730,,,,,,['Work']
1,lq1jn7,this1tyme,False,https://reddit.com/r/LifeProTips/comments/lq1jn7/,LifeProTips,"""Choose a job you love, and you will never hav...",False,,134320,4121,,,,,,['Other']
2,j2mm1b,raviji22,False,https://reddit.com/r/LifeProTips/comments/j2mm1b/,LifeProTips,"""Success is not final, failure is not fatal: I...",False,,129513,1971,,,,,,['Other']
3,fqkkke,[deleted],False,https://reddit.com/r/LifeProTips/comments/fqkkke/,LifeProTips,"""Opportunities don't happen. You create them.""...",True,[deleted],124219,2762,,,,,,['Family']
4,gmmiah,AlphaSyncz,False,https://reddit.com/r/YouShouldKnow/comments/gm...,YouShouldKnow,"""Don't watch the clock; do what it does. Keep ...",True,It's just basic stuff but I know friends of mi...,120038,1532,,,,,,['Family']
5,a95fr8,Humble_Internet_Dude,False,https://reddit.com/r/LifeProTips/comments/a95fr8/,LifeProTips,"""The best way to predict your future is to cre...",True,Woooo my first ever gold! Thanks stranger ❤ Me...,119977,964,,,,,,['Family']
6,kzkotx,mashingLumpkins,False,https://reddit.com/r/LifeProTips/comments/kzkotx/,LifeProTips,"""Success is not the key to happiness. Happines...",False,,118582,1792,,,,,,['Family']
7,fpfwra,Moonripple616,False,https://reddit.com/r/LifeProTips/comments/fpfwra/,LifeProTips,"""The only place where success comes before wor...",False,,117600,2933,,,,,,['Family']
8,o26lzm,WrongKielbasa,False,https://reddit.com/r/LifeProTips/comments/o26lzm/,LifeProTips,"""Your work is going to fill a large part of yo...",False,,115511,2633,,,,,,['Work']
9,f6jt5e,w2555,False,https://reddit.com/r/LifeProTips/comments/f6jt5e/,LifeProTips,"""Family is not an important thing, it's everyt...",True,I had a phone interview scheduled this morning...,147296,4730,,,,,,['Work']


In [5]:
data.describe()

Unnamed: 0,score,numComments,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14
count,27.0,27.0,0.0,0.0,0.0,0.0,0.0
mean,125228.444444,2604.222222,,,,,
std,9846.857848,1169.518025,,,,,
min,115511.0,964.0,,,,,
25%,118582.0,1792.0,,,,,
50%,120038.0,2633.0,,,,,
75%,129513.0,2933.0,,,,,
max,147296.0,4730.0,,,,,


In [6]:
X = data['postTitle']

In [7]:
X

0     "Hard work beats talent when talent doesn't wo...
1     "Choose a job you love, and you will never hav...
2     "Success is not final, failure is not fatal: I...
3     "Opportunities don't happen. You create them."...
4     "Don't watch the clock; do what it does. Keep ...
5     "The best way to predict your future is to cre...
6     "Success is not the key to happiness. Happines...
7     "The only place where success comes before wor...
8     "Your work is going to fill a large part of yo...
9     "Family is not an important thing, it's everyt...
10    "The love of a family is life's greatest bless...
11    "Family is where life begins and love never en...
12    "A happy family is but an earlier heaven." - G...
13    "Family means no one gets left behind or forgo...
14    "In family life, love is the oil that eases fr...
15    "The most important thing in the world is fami...
16    "A family is a place where minds come in conta...
17    "Rejoice with your family in the beautiful

# "Develop" a Model aka just use OpenAI's API

In [8]:
# Define candidate labels
candidate_labels = [
    "Work",
    "Family",
    "Other"
]

# Create and fit the classifier
clf = MultiLabelZeroShotGPTClassifier(max_labels=2) 
clf.fit(None, [candidate_labels])

In [9]:
# Predict the labels
labels = clf.predict(X)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [02:26<00:00,  5.41s/it]


In [10]:
# Add labels to the dataset and save
data['ChatGPTLabel'] = labels
data.to_csv('data/classified_tips.csv', index=False)

# Choose a measure of success, Choose an evaluation protocol / evaluate

In [11]:
# remove some schmutz from the labels, don't worry about what this does for now
data['HumanLabel'] = data['HumanLabel'].str.extract(r"\['(.*?)'\]")
data['ChatGPTLabel'] = data['ChatGPTLabel'].apply(lambda x: x[0] if x else None)

In [12]:
data[["postTitle","ChatGPTLabel","HumanLabel"]]

Unnamed: 0,postTitle,ChatGPTLabel,HumanLabel
0,"""Hard work beats talent when talent doesn't wo...",Work,Work
1,"""Choose a job you love, and you will never hav...",Work,Other
2,"""Success is not final, failure is not fatal: I...",Other,Other
3,"""Opportunities don't happen. You create them.""...",Work,Family
4,"""Don't watch the clock; do what it does. Keep ...",Work,Family
5,"""The best way to predict your future is to cre...",Other,Family
6,"""Success is not the key to happiness. Happines...",Work,Family
7,"""The only place where success comes before wor...",Work,Family
8,"""Your work is going to fill a large part of yo...",Work,Work
9,"""Family is not an important thing, it's everyt...",Family,Work


In [13]:
accuracy = accuracy_score(data[["HumanLabel"]], data[["ChatGPTLabel"]])
print(accuracy)

0.37037037037037035


# Skipped Steps
* Beat a baseline
* Overfit, regularize and tune
* Communicate with stakeholders
* Ship an inference model
* Monitor and maintain