# Setup

In [95]:
import os
import pandas as pd

from pprint import pprint

from docx import Document

from sklearn.metrics import accuracy_score

from skllm import MultiLabelZeroShotGPTClassifier
from skllm.config import SKLLMConfig

In [96]:
#See notes in INSTALL.md for how to set this, DO NOT HARD CODE YOUR API KEY HERE, 
# if your repository is public, then someone will steal your API key and make you pay for their shit 
SKLLMConfig.set_openai_key(os.environ.get('OPENAI_API_KEY'))

# Collect a Dataset

In [97]:
# Set the folder path where your Word documents are located
folder_path = 'data'

X = []

# Loop over each file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.docx'):
        # Construct the full file path
        file_path = os.path.join(folder_path, filename)
        
        # Open the Word document
        doc = Document(file_path)
        
        # Loop over each paragraph in the document and print its text
        filetext=""
        for para in doc.paragraphs:
            filetext+=para.text
        X.append(filetext)

data = pd.DataFrame(X, columns=['WritingSample'])



# Understand Your Data

In [98]:
data.head()

Unnamed: 0,WritingSample
0,"Every year, about 1.35 million people are kill..."
1,"In a world driven by technology, some recent a..."
2,The modern world is changing at a concerningly...
3,Technology in today's world continues to take ...
4,Technological improvements are occurring world...


In [99]:
data.describe()

Unnamed: 0,WritingSample
count,5
unique,5
top,"Every year, about 1.35 million people are kill..."
freq,1


In [100]:
X = data["WritingSample"]

In [101]:
X

0    Every year, about 1.35 million people are kill...
1    In a world driven by technology, some recent a...
2    The modern world is changing at a concerningly...
3    Technology in today's world continues to take ...
4    Technological improvements are occurring world...
Name: WritingSample, dtype: object

# "Develop" a Model aka just use OpenAI's API

In [102]:
# Define candidate labels
candidate_labels = [
    "A",
    "B",
    "C",
    "D",
    "F"
]

# Create and fit the classifier
clf = MultiLabelZeroShotGPTClassifier(max_labels=2) 
clf.fit(None, [candidate_labels])

# Define classification prompt (without specifying the writing_sample)
classification_prompt = (
    "Please be very strict when assigning scores. Anything that earns an A should be almost perfect. Bs should be extremly good. Cs should be average. Ds should be okay. Fs shuld be poor.\n"
    "This section will be scored on development. This section focuses primarily on flow and how well a student develops the main ideas in the writing. \n"
    "Please assess the student's work based on the following criteria:\n\n"
    "Development of Main Ideas:\n"
    "1. Perfect development of main ideas (A: Perfect)\n"
    "2. Great development of main ideas (B: Great)\n"
    "3. Good development of main ideas (C: Good)\n"
    "4. Okay development of main ideas (D: Fair)\n"
    "5. Undeveloped (F: Poor)\n\n"
    "Quality of Work:\n"
    "6. Above average work (A: Perfect)\n"
    "7. Great work (B: Very good)\n"
    "8. Good work (C: Average)\n"
    "9. Okay work (D: Fair)\n"
    "10. Below average work (F: Poor)\n\n"
    "Elaboration:\n"
    "11. Could not have done a better job of elaborating (A: Perfect)\n"
    "12. Great job of elaborating (B: Great)\n"
    "13. Good job of elaborating (C: Good)\n"
    "14. Okay job of elaborating (D: Fair)\n"
    "15. Poor elaboration (F: Poor)\n\n"
    "Flow:\n"
    "16. Writing flows perfectly (A: Perfect)\n"
    "17. Writing has a great flow (B: Great)\n"
    "18. Writing has a good flow (C: Good)\n"
    "19. The flow of this writing is confusing (D: Fair)\n"
    "20. The flow of this writing does not make sense (F: Poor)\n\n"
    "Use of Resources or Evidence:\n"
    "21. Great use of resources or evidence (A: Perfect)\n"
    "22. Used or referenced resources, but it could have been done better (C: Good)\n"
    "23. Inadequate use of resources or evidence (F: Poor)\n"
    "Please assign an overall score of A to F based on the criteria above. An A is the best score and an F is the worst score."
)

# Iterate through the Writing Samples and classify them
for writing_sample in X:
    # Generate a classification prompt (including criteria and the writing sample)
    complete_prompt = classification_prompt + f"\n\n{writing_sample}"
    
    # Use the classifier to classify the writing sample
    predicted_labels = clf.predict([complete_prompt])[0]
    
    # Print the classification result
    print("Predicted Labels:", predicted_labels)
    print()

100%|████████████████████████████████████████| 1/1 [00:00<00:00,  1.05it/s]


Predicted Labels: ['B', 'C']



100%|████████████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s]


Predicted Labels: ['A', 'B']



100%|████████████████████████████████████████| 1/1 [00:00<00:00,  1.23it/s]


Predicted Labels: ['A', 'C']



100%|████████████████████████████████████████| 1/1 [00:00<00:00,  1.38it/s]


Predicted Labels: ['B', 'C']



100%|████████████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s]

Predicted Labels: ['A', 'C']






In [93]:
# Predict the labels
labels = clf.predict(X)

100%|████████████████████████████████████████| 5/5 [00:03<00:00,  1.39it/s]


In [103]:
# Add labels to the dataset and save
data['ChatGPTLabel'] = predicted_labels
data.to_csv('data/classified_tips.csv', index=False)

ValueError: Length of values (2) does not match length of index (5)

# Choose a measure of success, Choose an evaluation protocol / evaluate

In [None]:
data[["WritingSample","ChatGPTLabel"]]

In [None]:
accuracy = accuracy_score(data[["HumanLabel"]], data[["ChatGPTLabel"]])
print(accuracy)

# Skipped Steps
* Beat a baseline
* Overfit, regularize and tune
* Communicate with stakeholders
* Ship an inference model
* Monitor and maintain