In [None]:
!pip install -U google-generativeai
!pip install chromadb
!pip install pandas
!pip install PyPDF2
!pip install python-dotenv

Collecting chromadb
  Downloading chromadb-1.1.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.2 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.2-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (8.7 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.23.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.9 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.37.0-py3-none-any.whl.metadata (2.4 kB)
Collecting pypika>=0.48.9 (from chromadb)
  Downloading PyPika-0.48.9.tar.gz (67 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?

In [None]:
import os, uuid
from typing import List
from dotenv import load_dotenv
from pprint import pprint
import pandas as pd
import chromadb
from chromadb import Documents, EmbeddingFunction, Embeddings, PersistentClient
from google.genai.types import Content
from IPython.display import Markdown, display
import google.generativeai as genai

In [None]:
df = pd.read_csv('artist_training_data.csv')

In [None]:
df.head(10)

Unnamed: 0,name,genre,email,location,nonprofit_interest,instagram,artist_bio,nonprofit_reasoning
0,Ryan Lee,Folk,ryan_lee@yahoo.com,"Urbana, IL",Environmental Conservation,ryan.lee,"Ryan Lee is a folk artist from Urbana, IL who ...","Growing up in the Midwest, Ryan Lee developed ..."
1,Xavier Brooks,Folk,xavier.brooks@gmail.com,"Evanston, IL",Social Justice,xavierbrooks,"Xavier Brooks is a folk artist from Evanston, ...",Xavier Brooks's commitment to social justice s...
2,Indigo Cox,Folk,cox.indigo@yahoo.com,"Evanston, IL",Economic Justice,cox_music,"Indigo Cox is a folk artist from Evanston, IL ...",Indigo Cox is deeply committed to economic jus...
3,Rosa Hughes,Folk,rosa_hughes@hotmail.com,"Oak Park, IL",Arts Education,rosa_sounds,"Based in Oak Park, IL, Rosa Hughes combines tr...",Rosa Hughes is deeply committed to arts educat...
4,Parker King,Folk,parker.king@hotmail.com,"Schaumburg, IL",Healthcare Access,parkerking,"Parker King is a folk artist from Schaumburg, ...",Parker King is deeply committed to healthcare ...
5,Emma Myers,Rock,emma.myers.music@icloud.com,"Naperville, IL",Arts Education,emmamyers,Emerging from the vibrant music scene of Naper...,Emma Myers is deeply committed to arts educati...
6,Drew Bennett,Rock,bennett.drew@yahoo.com,"Chicago, IL",Community Development,drewbennett,Emerging from the vibrant music scene of Chica...,Drew Bennett is deeply committed to community ...
7,Victor Green,Rock,victor_green@gmail.com,"Oak Park, IL",Community Development,green_music,Emerging from the vibrant music scene of Oak P...,Victor Green is deeply committed to community ...
8,Sofia Ross,Rock,sofiaross@outlook.com,"Oak Park, IL",Immigration Rights,sofia_ross,Emerging from the vibrant music scene of Oak P...,Sofia Ross is deeply committed to immigration ...
9,Antonio Jenkins,Rock,jenkins.antonio@yahoo.com,"Aurora, IL",Social Justice,antonio.jenkins,Emerging from the vibrant music scene of Auror...,Having witnessed the impact of voting rights i...


In [None]:
documents = []

for _, row in df.iterrows():
    entry = ""
    if row["name"]:
        entry += f"Name: {row['name']}\n"
    if row["genre"]:
        entry += f"Genre: {row['genre']}\n"
    if row["location"]:
        entry += f"Location: {row['location']}\n"
    if row["nonprofit_interest"]:
        entry += f"Nonprofit Interest: {row['nonprofit_interest']}\n"
    if row["artist_bio"]:
        entry += f"Bio: {row['artist_bio']}\n"
    if row["nonprofit_reasoning"]:
        entry += f"Reasoning: {row['nonprofit_reasoning']}\n"

    documents.append(entry.strip())

print(len(documents))           # number of artist docs
print(documents[0])

100
Name: Ryan Lee
Genre: Folk
Location: Urbana, IL
Nonprofit Interest: Environmental Conservation
Bio: Ryan Lee is a folk artist from Urbana, IL who weaves storytelling traditions with contemporary social commentary through acoustic melodies and heartfelt lyrics. Their music draws inspiration from midwest prairie landscapes and urban community experiences, creating authentic connections with audiences across diverse venues. With influences ranging from traditional American folk to modern indie-folk, Ryan Lee crafts songs that resonate with both intimate coffeehouse settings and larger festival stages.
Reasoning: Growing up in the Midwest, Ryan Lee developed a deep connection to natural spaces and became increasingly concerned about environmental degradation, particularly issues related to plastic pollution reduction. They view music as a powerful tool for environmental education and community mobilization, seeking to collaborate with organizations that share their commitment to enviro

In [None]:
# -------------- setup --------------
load_dotenv()
genai.configure(api_key="AIzaSyCQo94-QEvRbeQqnBEDhmC0SgsktF_rzQ8")

client = PersistentClient(path="./database")
# drop and recreate
try:
    client.delete_collection("artists")
except Exception:
    pass
coll = client.create_collection(name="artists")
# -------------- embed --------------
embeddings = []
for doc in documents:
    resp = genai.embed_content(
        model="models/text-embedding-004",   # stable embedding model
        content=doc,
        task_type="retrieval_document"
    )
    embeddings.append(resp["embedding"])

# -------------- store in chroma --------------
ids = [str(uuid.uuid4()) for _ in documents]
coll.add(ids=ids, documents=documents, embeddings=embeddings)

print("Stored", coll.count(), "bios in ChromaDB")

Stored 100 bios in ChromaDB


In [None]:
res_peek = coll.peek(1)

print("IDs:")
pprint(res_peek["ids"])

print("\nDocuments:")
pprint(res_peek["documents"])

print("\nEmbedding:")
for i, emb in enumerate(res_peek["embeddings"]):
    print(f"Embedding {i}: {emb[:]}...")

print("\nMetadatas:")
pprint(res_peek["metadatas"])

IDs:
['1ce46baa-1871-4fdf-87ea-0a310b6c8fc0']

Documents:
['Name: Ryan Lee\n'
 'Genre: Folk\n'
 'Location: Urbana, IL\n'
 'Nonprofit Interest: Environmental Conservation\n'
 'Bio: Ryan Lee is a folk artist from Urbana, IL who weaves storytelling '
 'traditions with contemporary social commentary through acoustic melodies and '
 'heartfelt lyrics. Their music draws inspiration from midwest prairie '
 'landscapes and urban community experiences, creating authentic connections '
 'with audiences across diverse venues. With influences ranging from '
 'traditional American folk to modern indie-folk, Ryan Lee crafts songs that '
 'resonate with both intimate coffeehouse settings and larger festival '
 'stages.\n'
 'Reasoning: Growing up in the Midwest, Ryan Lee developed a deep connection '
 'to natural spaces and became increasingly concerned about environmental '
 'degradation, particularly issues related to plastic pollution reduction. '
 'They view music as a powerful tool for environmen

In [None]:
def get_relevant_passages(query, db, n_results=10):
    # Embed the query using the same model as the collection
    query_embedding = genai.embed_content(
        model="models/text-embedding-004",
        content=query,
        task_type="retrieval_query" # optimize for vector search
    )["embedding"]

    passages = db.query(
        query_embeddings=[query_embedding],
        n_results=n_results,
        include=["documents"]
    )["documents"][0]
    return passages

In [None]:
def convert_passages_to_list(passages):
    context = ""
    for passage in passages:
        context += passage + "\n---\n"   # separator for readability
    return context.strip()

In [None]:
def make_prompt(query, relevant_passages):
    prompt = f"""
You are helping match musicians with nonprofit organizations
based on their bios and stated nonprofit interests.

Question: {query}

Here are some artist bios and interests:
{relevant_passages}

If the question is unrelated to the artists' interests, say so clearly.
Otherwise, return the names of artists most relevant to the cause and the reasoning behind your decision.
"""
    return prompt.strip()

In [None]:
question = "I am working with a nonprofit focused on cancer research. Which artists might align with this cause?"

# Step 1: retrieve
passages = get_relevant_passages(question, coll, n_results=10)

# Step 2: combine passages
context = convert_passages_to_list(passages)

# Step 3: build prompt
prompt = make_prompt(question, context)

# Step 4: preview the prompt
display(Markdown(prompt))

You are helping match musicians with nonprofit organizations
based on their bios and stated nonprofit interests.

Question: I am working with a nonprofit focused on cancer research. Which artists might align with this cause?

Here are some artist bios and interests:
Name: Nova Walker
Genre: Alternative
Location: Evanston, IL
Nonprofit Interest: Education Equity
Bio: Based in Evanston, IL, Nova Walker creates alternative music that combines artistic excellence with social consciousness, performing for audiences who appreciate both technical skill and meaningful content. Their compositions reflect the diversity and complexity of modern American life while honoring musical traditions and pushing creative boundaries. Nova Walker is dedicated to using their platform to promote understanding, justice, and positive change through powerful, authentic musical expression.
Reasoning: Recognizing the transformative power of education and the persistent inequalities in educational access, particularly around after-school programs, Nova Walker is committed to supporting organizations that work to level the playing field for all students. They believe that music and arts education are essential components of a well-rounded education and can provide young people with critical thinking skills, creative expression, and confidence. Nova Walker seeks to partner with nonprofits that share their vision of educational justice and opportunity for all.
---
Name: Parker King
Genre: Folk
Location: Schaumburg, IL
Nonprofit Interest: Healthcare Access
Bio: Parker King is a folk artist from Schaumburg, IL who weaves storytelling traditions with contemporary social commentary through acoustic melodies and heartfelt lyrics. Their music draws inspiration from midwest prairie landscapes and urban community experiences, creating authentic connections with audiences across diverse venues. With influences ranging from traditional American folk to modern indie-folk, Parker King crafts songs that resonate with both intimate coffeehouse settings and larger festival stages.
Reasoning: Parker King is deeply committed to healthcare access because they believe artists have a unique responsibility to use their platform for positive social impact, particularly in addressing critical issues like preventive care. Their personal experiences and observations of community needs have strengthened their conviction that music can be a powerful force for awareness, healing, and mobilization around important causes. Through partnerships with nonprofits working on preventive care, Parker King hopes to contribute meaningfully to creating positive change while inspiring others to get involved in their communities.
---
Name: Victor Barnes
Genre: Electronic
Location: Chicago, IL
Nonprofit Interest: Senior Care
Bio: Victor Barnes is a electronic artist from Chicago, IL who creates innovative music that blends traditional elements with contemporary influences and socially conscious themes. Their artistic vision encompasses both technical excellence and emotional authenticity, resulting in performances that connect deeply with diverse audiences across the Midwest. Through their music, Victor Barnes explores themes of community, identity, and social justice while maintaining artistic integrity and creative innovation.
Reasoning: Victor Barnes is deeply committed to senior care because they believe artists have a unique responsibility to use their platform for positive social impact, particularly in addressing critical issues like Medicare advocacy. Their personal experiences and observations of community needs have strengthened their conviction that music can be a powerful force for awareness, healing, and mobilization around important causes. Through partnerships with nonprofits working on Medicare advocacy, Victor Barnes hopes to contribute meaningfully to creating positive change while inspiring others to get involved in their communities.
---
Name: Luna Torres
Genre: Alternative
Location: Rockford, IL
Nonprofit Interest: Arts Education
Bio: Luna Torres is a alternative artist from Rockford, IL who creates innovative music that blends traditional elements with contemporary influences and socially conscious themes. Their artistic vision encompasses both technical excellence and emotional authenticity, resulting in performances that connect deeply with diverse audiences across the Midwest. Through their music, Luna Torres explores themes of community, identity, and social justice while maintaining artistic integrity and creative innovation.
Reasoning: Luna Torres is deeply committed to arts education because they believe artists have a unique responsibility to use their platform for positive social impact, particularly in addressing critical issues like music therapy. Their personal experiences and observations of community needs have strengthened their conviction that music can be a powerful force for awareness, healing, and mobilization around important causes. Through partnerships with nonprofits working on music therapy, Luna Torres hopes to contribute meaningfully to creating positive change while inspiring others to get involved in their communities.
---
Name: Quinn Rivera
Genre: Country
Location: Rockford, IL
Nonprofit Interest: Healthcare Access
Bio: Based in Rockford, IL, Quinn Rivera creates country music that combines artistic excellence with social consciousness, performing for audiences who appreciate both technical skill and meaningful content. Their compositions reflect the diversity and complexity of modern American life while honoring musical traditions and pushing creative boundaries. Quinn Rivera is dedicated to using their platform to promote understanding, justice, and positive change through powerful, authentic musical expression.
Reasoning: Quinn Rivera is deeply committed to healthcare access because they believe artists have a unique responsibility to use their platform for positive social impact, particularly in addressing critical issues like preventive care. Their personal experiences and observations of community needs have strengthened their conviction that music can be a powerful force for awareness, healing, and mobilization around important causes. Through partnerships with nonprofits working on preventive care, Quinn Rivera hopes to contribute meaningfully to creating positive change while inspiring others to get involved in their communities.
---
Name: Phoenix Gomez
Genre: Singer-Songwriter
Location: Oak Park, IL
Nonprofit Interest: Senior Care
Bio: Based in Oak Park, IL, Phoenix Gomez creates singer-songwriter music that combines artistic excellence with social consciousness, performing for audiences who appreciate both technical skill and meaningful content. Their compositions reflect the diversity and complexity of modern American life while honoring musical traditions and pushing creative boundaries. Phoenix Gomez is dedicated to using their platform to promote understanding, justice, and positive change through powerful, authentic musical expression.
Reasoning: Phoenix Gomez is deeply committed to senior care because they believe artists have a unique responsibility to use their platform for positive social impact, particularly in addressing critical issues like elder abuse prevention. Their personal experiences and observations of community needs have strengthened their conviction that music can be a powerful force for awareness, healing, and mobilization around important causes. Through partnerships with nonprofits working on elder abuse prevention, Phoenix Gomez hopes to contribute meaningfully to creating positive change while inspiring others to get involved in their communities.
---
Name: Nova Torres
Genre: Punk
Location: Naperville, IL
Nonprofit Interest: Healthcare Access
Bio: Performing throughout Naperville, IL and the broader region, Nova Torres brings a unique perspective to punk music through thoughtful composition and engaging live performances. Their work demonstrates technical skill, creative vision, and commitment to using music as a vehicle for positive social impact and community connection. Nova Torres draws inspiration from the rich cultural landscape of the Midwest while addressing contemporary issues and universal human experiences.
Reasoning: Nova Torres is deeply committed to healthcare access because they believe artists have a unique responsibility to use their platform for positive social impact, particularly in addressing critical issues like preventive care. Their personal experiences and observations of community needs have strengthened their conviction that music can be a powerful force for awareness, healing, and mobilization around important causes. Through partnerships with nonprofits working on preventive care, Nova Torres hopes to contribute meaningfully to creating positive change while inspiring others to get involved in their communities.
---
Name: Elena Cook
Genre: Acoustic
Location: Chicago, IL
Nonprofit Interest: Senior Care
Bio: Based in Chicago, IL, Elena Cook creates acoustic music that combines artistic excellence with social consciousness, performing for audiences who appreciate both technical skill and meaningful content. Their compositions reflect the diversity and complexity of modern American life while honoring musical traditions and pushing creative boundaries. Elena Cook is dedicated to using their platform to promote understanding, justice, and positive change through powerful, authentic musical expression.
Reasoning: Elena Cook is deeply committed to senior care because they believe artists have a unique responsibility to use their platform for positive social impact, particularly in addressing critical issues like Medicare advocacy. Their personal experiences and observations of community needs have strengthened their conviction that music can be a powerful force for awareness, healing, and mobilization around important causes. Through partnerships with nonprofits working on Medicare advocacy, Elena Cook hopes to contribute meaningfully to creating positive change while inspiring others to get involved in their communities.
---
Name: Antonio Jenkins
Genre: Rock
Location: Aurora, IL
Nonprofit Interest: Social Justice
Bio: Emerging from the vibrant music scene of Aurora, IL, Antonio Jenkins creates rock music that balances technical proficiency with emotional depth and social awareness. Their compositions feature intricate guitar work, driving rhythms, and vocals that range from intimate whispers to soaring anthems. Antonio Jenkins draws inspiration from both the industrial heritage and natural beauty of the Midwest, crafting songs that speak to contemporary struggles and aspirations.
Reasoning: Having witnessed the impact of voting rights in their community, Antonio Jenkins is driven to support social justice organizations that work toward creating more equitable systems and opportunities. They believe music has the unique ability to build empathy, bridge divides, and mobilize people around shared values of fairness and human dignity. Antonio Jenkins seeks partnerships with nonprofits that align with their vision of using artistic expression to advance social justice and community healing.
---
Name: Iris Anderson
Genre: Electronic
Location: Rockford, IL
Nonprofit Interest: LGBTQ+ Rights
Bio: Based in Rockford, IL, Iris Anderson creates electronic music that combines artistic excellence with social consciousness, performing for audiences who appreciate both technical skill and meaningful content. Their compositions reflect the diversity and complexity of modern American life while honoring musical traditions and pushing creative boundaries. Iris Anderson is dedicated to using their platform to promote understanding, justice, and positive change through powerful, authentic musical expression.
Reasoning: Iris Anderson is deeply committed to lgbtq+ rights because they believe artists have a unique responsibility to use their platform for positive social impact, particularly in addressing critical issues like youth support. Their personal experiences and observations of community needs have strengthened their conviction that music can be a powerful force for awareness, healing, and mobilization around important causes. Through partnerships with nonprofits working on youth support, Iris Anderson hopes to contribute meaningfully to creating positive change while inspiring others to get involved in their communities.
---

If the question is unrelated to the artists' interests, say so clearly.
Otherwise, return the names of artists most relevant to the cause and the reasoning behind your decision.