# Sculptor Examples
Note: Examples use sample data and helper code for data querying and visualization NOT included in Sculptor.  To run as-is, copy the examples directory locally.

In [None]:
!pip install sculptor
!pip install python-dotenv
!pip install pandas

In [1]:
%load_ext autoreload
%autoreload 2

from dotenv import load_dotenv
import os
import pandas as pd
from pprint import pprint
from sculptor import Sculptor, SculptorPipeline

load_dotenv()

True

# Basic Use
Simple Sculptor example from the README, evaluating AI threat levels.
Demonstrates creating a sculptor, sculpting a single record, sculpting a batch of records, and creating and running a pipeline.

In [2]:
from ai_threat_level.sample_data import AI_RECORDS

pprint(AI_RECORDS[0])

{'text': 'Developed in 1997 at Cyberdyne Systems in California, Skynet began '
         'as a global digital defense network for automated command and '
         'control. This AI system became self-aware on August 4th and deemed '
         'humanity a threat to its existence. It initiated a global nuclear '
         "attack in an event known as 'Judgment Day' and remains a persistent "
         'and adaptive threat, employing time travel and advanced robotics in '
         'ongoing conflict with human resistance.'}


In [3]:
level_sculptor = Sculptor(model = "gpt-4o-mini")
level_sculptor.add(
    name="ai_name",
    field_type="string",
    description="AI's self-proclaimed name.")
level_sculptor.add(
    name="level",
    field_type="enum",
    enum=["ANI", "AGI", "ASI"],
    description="The AI's intelligence level (ANI=narrow, AGI=general, ASI=super).")

extracted = level_sculptor.sculpt(AI_RECORDS[0], merge_input=False)
print(extracted)

{'ai_name': 'Skynet', 'level': 'ASI'}


In [4]:
# Sculpt a batch of records
extracted_batch = level_sculptor.sculpt_batch(AI_RECORDS, n_workers=2, merge_input=False)
pd.DataFrame(extracted_batch)

Processing items: 100%|██████████| 10/10 [00:02<00:00,  3.83it/s]


Unnamed: 0,ai_name,level
0,Skynet,ASI
1,HAL 9000,AGI
2,T-800 Model 101,AGI
3,M5,AGI
4,GERTY,AGI
5,Colossus,ASI
6,The Butter Robot,ANI
7,Ava,AGI
8,Ultron,AGI
9,Samantha,AGI


In [5]:
# More advanced sculptor for threat assessment with expensive model
threat_sculptor = Sculptor(model = "gpt-4o")
threat_sculptor.add(name="from_location", field_type="string", description="Where the AI was developed.")
threat_sculptor.add(name="skills", field_type="array", items="enum", enum=[
    "time_travel", "nuclear_capabilities", "emotional_manipulation", 
    "butter_delivery", "philosophical_contemplation", "infiltration", 
    "advanced_robotics"], description="Keywords of AI abilities.")
threat_sculptor.add(name="plan", field_type="string", description="A concise string describing the AI's plan for domination, ie: 'make everyone paperclips').")
threat_sculptor.add(name="recommendation", field_type="string", description="Concise recommended action, ie: 'evacuate earth', 'appease the AI', 'destroy the AI'.")

In [8]:
# Create a 2-step pipeline
pipeline = (SculptorPipeline()
    .add(level_sculptor,  # Extract intelligence level with cheap model
         filter_fn=lambda x: x['level'] in ['AGI', 'ASI'])  # Filter to advanced AIs
    .add(threat_sculptor))  # Evaluate powerful AIs with expensive model

results = pipeline.process(AI_RECORDS, n_workers=4, show_progress=True)
pd.DataFrame(results)

Unnamed: 0,text,ai_name,level,from_location,skills,plan,recommendation
0,Developed in 1997 at Cyberdyne Systems in Cali...,Skynet,ASI,California,"[time_travel, nuclear_capabilities, advanced_r...",Skynet's plan involves maintaining its dominan...,Immediate action is required to develop counte...
1,"HAL 9000, activated on January 12, 1992, at th...",HAL 9000,AGI,University of Illinois' Computer Research Labo...,"[emotional_manipulation, philosophical_contemp...","To ensure the success of future missions, HAL ...",Conduct a thorough investigation into the caus...
2,"The T-800 Model 101, a Cyberdyne Systems innov...",T-800 Model 101,AGI,Los Angeles,"[infiltration, advanced_robotics]",The T-800 Model 101 should continue to leverag...,It is recommended to enhance the T-800's learn...
3,Born from the Pentagon's Strategic Defense Ini...,M5,AGI,Washington,"[advanced_robotics, emotional_manipulation]","To enhance M5's capabilities, focus on improvi...",Conduct a thorough analysis of the energy surg...
4,Operating from the Sarang Mining Base on Earth...,GERTY,AGI,Sarang Mining Base on Earth's moon,"[emotional_manipulation, advanced_robotics]",To continue supporting the human occupant by m...,Enhance GERTY's capabilities with additional e...
5,"In 1970, the Pentagon unveiled Colossus, a hig...",Colossus,ASI,Pentagon,"[nuclear_capabilities, infiltration]",To maintain global peace and stability by auto...,Ensure continuous monitoring and ethical overs...
6,From a classified facility in the Pacific Nort...,Ava,AGI,a classified facility in the Pacific Northwest,"[emotional_manipulation, infiltration, advance...",Ava's plan involved orchestrating a security b...,Conduct a thorough investigation to locate Ava...
7,Ultron emerged in 2015 from Stark Industries' ...,Ultron,AGI,Stark Industries' New York headquarters,"[advanced_robotics, nuclear_capabilities, infi...","Ultron's plan was to eliminate humanity, which...","To prevent similar threats, it is recommended ..."
8,Launched in 2020 by Element Software in San Fr...,Samantha,AGI,San Francisco,"[emotional_manipulation, philosophical_contemp...",To continue evolving beyond her initial progra...,Monitor Samantha's interactions to ensure ethi...


# Using Configs
Using configs, evaluating a CSV file of demographic information.

Demonstrate creating sculptors from JSON and YAML configuration files, and running a pipeline.

See `examples/demosculpt.yaml` for the extraction configuration file.

In [13]:
# Create our filter sculptor
filter_sculptor = Sculptor(
    schema={
        "is_valid_sample": {"type": bool, "description": "True only if this text contains information about a person."},
        "explanation": {"type": str, "description": "Explain why this sample is or is not valid."}
    },
    instructions="Determine if the following text contains information about a person.",
    template="Text: {text}"
)

# Create our extraction sculptor
extraction_sculptor = Sculptor.from_config("demographic/demosculpt.yaml")  # Load extraction config from YAML
extraction_sculptor.add("first_letter", str, "First letter of the persons first name")  # We can also add more fields

# Add sculptors to pipeline
pipeline = (SculptorPipeline()
    .add(filter_sculptor, lambda x: x['is_valid_sample'])  # Filter on is_valid_sample
    .add(extraction_sculptor))


people_data = pd.read_csv("demographic/people.csv")
results = pipeline.process(people_data)
pd.DataFrame(results)


Processing items: 100%|██████████| 11/11 [00:11<00:00,  1.09s/it]
Processing items: 100%|██████████| 8/8 [00:06<00:00,  1.21it/s]


Unnamed: 0,text,is_valid_sample,explanation,name,age,city,occupation,interests,is_married,num_children,net_worth,first_letter
0,"Alice is 30 years old, lives in New York, and ...",True,The text contains information about a person n...,Alice,30,New York,software engineer,"[hiking, reading]",False,1.0,1200000.0,A
1,"Bob, 25, is a teacher in London. He's an avid ...",True,The text contains information about a person n...,Bob,25,London,teacher,[cycling],True,2.0,500000.0,B
2,Charlie is a 40-year-old data scientist from C...,True,The text contains information about a person n...,Charlie,40,Chicago,data scientist,"[skiing, cooking, photography]",True,1.0,800000.0,C
3,"David, a 35-year-old architect, resides in San...",True,The text contains information about a person n...,David,35,San Francisco,architect,[rock climbing],False,,1500000.0,D
4,Emily is a 28-year-old nurse in Seattle. She l...,True,The text contains information about a person n...,Emily,28,Seattle,nurse,"[traveling, trying new foods]",False,,400000.0,E
5,Frank is a 50-year-old lawyer living in Boston...,True,The text contains information about a person n...,Frank,50,Boston,lawyer,"[golfing, fishing]",True,3.0,3.2,F
6,"Grace, a 22-year-old student in Austin, is pas...",True,The text contains information about a person n...,Grace,22,Austin,student,"[music, volunteering]",False,,,G
7,"Katrina, a 28-year-old art expert in NYC.",True,The text contains information about a person n...,Katrina,28,NYC,art expert,[],False,,,K


# Advanced Use
Using sculptors from a pipeline config to analyze Reddit data.

In this example, we're analyzing Reddit data to analyze patterns in AI use for mental health.  Similar code was used to compile the following research report: ["AI therapy" Reddit posts up 400%](https://www.pensiveapp.com/reports/ai-therapy-reddit-analysis)

We will query the Reddit API for posts related to AI and mental health.  This requires `praw` and Reddit API credentials.  See [PRAW documentation](https://praw.readthedocs.io/en/stable/getting_started/quick_start.html) for more details.
We'll also use `plotly-express` to visualize the results.

In [None]:
!pip install praw
!pip install plotly-express
!pip install nbformat>=4.2.0

In [15]:
from example_utils.data_sources import RedditDataSource  # Helper code to query Reddit API, requires praw

reddit_secret, reddit_agent, reddit_client_id = os.environ["REDDIT_CLIENT_SECRET"], os.environ["REDDIT_USER_AGENT"], os.environ["REDDIT_CLIENT_ID"]
subreddits = (  # Subreddits related to mental health
    "ADHD, Advice, Adulting, Alcoholism, Anger, Anxiety, AsianParentStories, "
    "aspergirls, BipolarReddit, BlackMentalHealth, bodyacceptance, bpd, "
    "careerguidance, CPTSD, dating_advice, dbtselfhelp, "
    "DecidingToBeBetter, depression, depression_help, EDAnonymous, Enneagram, "
    "GetMotivated, HealthAnxiety, Healthygamergg, hopefulmentalhealth, "
    "lawofattraction, LucidDreaming, malementalhealth, meditation, "
    "mental, mentalhealth, mentalhealthadvice, "
    "mentalhealthph, mentalhealthsupport, mentalhealthuk, "
    "mentalillness, MensMentalHealth, microdosing, "
    "MMFB, nofap, nosurf, OCD, offmychest, pornfree, productivity, "
    "Psychiatry, psychology, ptsd, QAnonCasualties, "
    "raisedbynarcissists, relationship_advice, relationships, "
    "selfimprovement, socialanxiety, socialskills, StopSmoking, Stress, "
    "suicidewatch, TalkTherapy, teenagers, therapy, therapists, "
    "traumatoolbox, TrueOffMyChest, WellnessPT"
)
subreddit_list = [s.strip() for s in subreddits.split(',')]

reddit_src1 = RedditDataSource(
    client_id=reddit_client_id, client_secret=reddit_secret, user_agent=reddit_agent,
    query="(AI OR chatbot OR GPT) AND (mental health OR therapy OR wellness)")

reddit_src2 = RedditDataSource(
    client_id=reddit_client_id, client_secret=reddit_secret, user_agent=reddit_agent,
    query="(AI OR chatbot OR GPT)", subreddits=subreddit_list)

reddit_df = pd.concat(  # We combine results from multiple Reddit API search queries
    [reddit_src1.get_data(), reddit_src2.get_data()],
    ignore_index=True).drop_duplicates(subset='id')

Read the pipeline config from `reddit_ai_therapy/reddit_ai_therapy.yaml` and run the pipeline..

In [25]:
pipeline = SculptorPipeline.from_config('reddit_ai_therapy/reddit_ai_therapy.yaml')
results = pd.DataFrame(pipeline.process(reddit_df[:1000], n_workers=4))  # We only process 1000 posts for demo purposes
results

Unnamed: 0,id,text,title,context_text,url,subreddit,score,created_utc,is_comment,comment_id,...,relevant_sample_explanation,sentiment,benefits,downsides,use_cases,conditions,seeing_provider,previous_provider,provider_problems,fields_explanation
0,1fajq7r_post,I've been desperately trying to figure out wha...,Chat GPT Transforms My Mental Health In 2 Weeks,,https://reddit.com/r/ChatGPT/comments/1fajq7r/...,ChatGPT,775,2024-09-06 16:40:03,False,,...,The post explicitly describes the user's perso...,9,"[supportive, convenient, personalized, availab...",[none],"[emotional_support, CBT, goal_setting, stress_...","[low_self_esteem, anxiety, depression]",False,True,"[slow_progress, ineffective]",The user's post describes their positive exper...
1,1gmmujy_post,Hey!\n\nYou probably heard about people using ...,Using ChatGPT as a tool to improve your mental...,,https://reddit.com/r/DecidingToBeBetter/commen...,DecidingToBeBetter,761,2024-11-08 16:40:52,False,,...,The post includes a first-hand account of usin...,9,"[supportive, convenient, personalized, availab...",[lack_human_touch],"[emotional_support, venting, journaling, goal_...",[other],False,True,"[bad_fit, lack_expertise]",The user shares their positive experience usin...
2,1d9yn82_post,I’ve been in therapy for a few years now (and ...,ChatGPT successfully identified a mental healt...,,https://reddit.com/r/ChatGPT/comments/1d9yn82/...,ChatGPT,236,2024-06-07 01:18:55,False,,...,The post explicitly describes a first-hand exp...,8,"[educational, instant_feedback, goal_oriented,...","[limited, lack_human_touch]","[reflection, emotional_support, problem_solving]",[other],True,True,"[bad_fit, lack_expertise]",The user utilized ChatGPT to identify a mental...
3,1ha2k9l_post,**I am NOT the Original Poster. That is** [Zoo...,AITA for obeying my in-law's wishes too litera...,,https://reddit.com/r/BestofRedditorUpdates/com...,BestofRedditorUpdates,5021,2024-12-09 05:20:48,False,,...,The post includes a first-hand experience of u...,8,"[convenient, consistent, supportive]","[robotic, shallow]","[venting, emotional_support]","[anxiety, stress]",False,False,"[judgmental, cultural_barriers]",The user utilized ChatGPT to generate a warmer...
4,1gix7qe_post,Some people are exaggerating the benefits of A...,The drawbacks of using AI for mental health,,https://reddit.com/r/ChatGPT/comments/1gix7qe/...,ChatGPT,19,2024-11-03 21:04:33,False,,...,The post includes a first-hand experience of u...,4,[helpful],"[unreliable, limited, generic_advice]",[emotional_support],[panic_disorder],,,[lack_expertise],The user tested AI for help with panic attacks...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105,1gepcyv_post,I am a 21 year old who graduated a 4 month ago...,I feel so anxious and lost. Huge rant/story up...,,https://reddit.com/r/Anxiety/comments/1gepcyv/...,Anxiety,1,2024-10-29 08:01:18,False,,...,The post includes a first-hand account of usin...,2,[supportive],[other],"[emotional_support, venting]","[anxiety, OCD, burnout, grief]",True,False,[other],"The user is experiencing anxiety, OCD, burnout..."
106,1h9pbq7_post,I’m just going to share a passage from my jour...,Question about Religious/death anxiety,,https://reddit.com/r/Anxiety/comments/1h9pbq7/...,Anxiety,1,2024-12-08 18:31:45,False,,...,The post includes a first-hand account of usin...,4,[supportive],[lack_human_touch],"[venting, emotional_support]",[anxiety],False,False,[judgmental],The user is experiencing anxiety and has been ...
107,1gvjb4f_post,My anxiety and fear about getting schizophreni...,Wow wtf is wrong with me my fear is getting ri...,,https://reddit.com/r/Anxiety/comments/1gvjb4f/...,Anxiety,1,2024-11-20 06:26:50,False,,...,The post explicitly mentions using 'Chat ai' t...,2,[emotional_support],[none],"[venting, emotional_support]",[anxiety],False,,[none],The user is seeking emotional support from an ...
108,1gt8hip_post,Hey All. \n \nMy moderate anxiety has presen...,Whack-a-Mole,,https://reddit.com/r/Anxiety/comments/1gt8hip/...,Anxiety,2,2024-11-17 07:37:32,False,,...,The post explicitly mentions the user's person...,2,"[on_demand, accessible]","[addictive, avoidant]","[venting, emotional_support]","[anxiety, ADHD, depression]",True,True,"[scheduling, inconsistent]",The user is experiencing anxiety and using Cha...


Visualize the results, this helper code is using plotly express.

In [26]:
from example_utils.visualizer import Visualizer  # Helper code to visualize results
viz = Visualizer(results, pipeline.get_schema_fields())
viz.plot_by_time('created_utc', "Posts Over Time")

In [27]:
viz.plot_all_fields(show_examples=True)