In [None]:
!docker pull containers.intersystems.com/intersystems/iris-community:2024.1

In [None]:
!pip3 install testcontainers.iris

In [20]:
from testcontainers.iris import IRISContainer
import os

image = 'containers.intersystems.com/intersystems/iris-community:2024.1'
container = IRISContainer(image, username="demo", password="demo", namespace="demo")
container.with_exposed_ports(1972, 52773)
container.start()
CONNECTION_STRING = container.get_connection_url("localhost")

Pulling image containers.intersystems.com/intersystems/iris-community:2024.1
Container started: 4ff56480969a
Waiting to be ready...


In [None]:
!pip3 install pandas
!pip3 install sentence_transformers
!pip3 install sqlalchemy

In [21]:

import os, pandas as pd
from sentence_transformers import SentenceTransformer
from sqlalchemy import create_engine, text

In [22]:
engine = create_engine(CONNECTION_STRING)

In [None]:
!pip3 install firebase_admin

In [23]:
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore

In [25]:
cred = credentials.Certificate('service_key.json')

app = firebase_admin.initialize_app(cred, name='new')
db = firestore.client()

In [26]:
nonprofit_ref = db.collection("nonprofits")
docs = nonprofit_ref.stream()

nonprofits = []

In [27]:
for doc in docs:  
    nonprofit = {
        'id': doc.id,
        'name': doc.get('name'),
        'website': doc.get('website'),
        'city': doc.get('city'),
        'state': doc.get('state'),
        'mission': doc.get('mission'),
        'mission_vector': doc.get('mission_vector'),    
    }

    nonprofits.append(nonprofit)

print(len(nonprofits))

343


In [28]:
df = pd.DataFrame(nonprofits)

df.head()

Unnamed: 0,id,name,website,city,state,mission,mission_vector
0,0GLB8dk058bdbiHFq4vc,Hope House Shelter,www.hopehouseshelter.org,Springfield,MA,We provide compassionate care and support to i...,"[-0.0505363903939724, -0.01882144808769226, 0...."
1,0ZtNJyNf2VzN05mVcbP6,GreenFuture Initiative,www.greenfutureinitiative.org,Boston,MA,Working to protect and restore our local envir...,"[0.028934689238667488, 0.05520101636648178, 0...."
2,0a6geGfA8vmCu9gSD056,City Rise Community Center,www.cityrisecc.org,Worcester,MA,Our mission is to empower underserved communit...,"[0.012638190761208534, -0.03829539567232132, -..."
3,17NK7nlp3yXXcwADnJ0o,Mental Health Association of Massachusetts,www.mentalhealthmass.org,Fall River,MA,The Mental Health Association of Massachusetts...,"[-0.0486491434276104, -0.01821873150765896, 0...."
4,1OQYNZnRkupyspuLjrWg,Boston Women's Empowerment Center,www.bostonwomensempowerment.org,Boston,MA,We work to empower women and girls through edu...,"[-0.00923081487417221, 0.025880808010697365, 0..."


In [29]:
with engine.connect() as conn:
    with conn.begin():# Load 
        sql = f"""
                CREATE TABLE nonprofits (
        id VARCHAR(255),
        name VARCHAR(255),
        website VARCHAR(500),
        city VARCHAR(255),
        state VARCHAR(2000),
        mission VARCHAR(2000),
        mission_vector VECTOR(DOUBLE, 384)
        )
                """
        result = conn.execute(text(sql))



In [None]:
with engine.connect() as conn:
    with conn.begin():
        for index, row in df.iterrows():
            sql = text("""
                INSERT INTO nonprofits 
                (id, name, website, city, state, mission, mission_vector)
                VALUES  (:id, :name, :website, :city, :state, :mission, :mission_vector)
            """)
            conn.execute(sql, {
                'id': row['id'],
                'name': row['name'],
                'website': row['website'],
                'city': row['city'],
                'state': row['state'],
                'mission': row['mission'],
                'mission_vector': row['mission_vector']
            })

In [None]:
model = SentenceTransformer('all-MiniLM-L6-v2') 

In [29]:
# Generate embeddings for all descriptions at once. Batch processing makes it faster
embeddings = model.encode(df['mission'].tolist(), normalize_embeddings=True)

# Add the embeddings to the DataFrame
df['mission_vector'] = embeddings.tolist()

In [None]:
df.head()


In [39]:
for index, row in df.iterrows(): 
    ref = db.collection("nonprofits").document(row['id'])

    ref.update({"mission_vector": row['mission_vector']})

In [None]:
description_search = "Payment for online art supplies"
search_vector = model.encode(description_search, normalize_embeddings=True).tolist() # Convert search phrase into a vector
print(search_vector)

In [None]:
print(search_vector)

with engine.connect() as conn:
    with conn.begin():
        sql = text("""
            SELECT * FROM nonprofits 
        """)

        # search_vector_str = ', '.join(map(str, search_vector))  # Convert vector to string


        results = conn.execute(sql).fetchall()
    
    print(results)

In [None]:
print(results)

In [None]:
results_df = pd.DataFrame(results, columns=df.columns).iloc[:, :-1] # Remove vector
pd.set_option('display.max_colwidth', None)  # Easier to read description
results_df.head()