In [1]:
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
from langchain_openai import OpenAIEmbeddings
import chromadb
from decouple import config
import chromadb.utils.embedding_functions as embedding_functions
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
                api_key=config("OPENAI_API_KEY"),
                model_name="text-embedding-3-small"
            )
chroma_client = chromadb.HttpClient(host=config('CHROMA_HOST'), port=8000)

                        
collection = chroma_client.get_or_create_collection(name="investor_lookup", embedding_function=openai_ef)


In [17]:
import uuid
import requests
from decouple import config
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_pinecone import PineconeVectorStore
from langsmith import traceable
from pinecone import Pinecone
from pyairtable import Api
from django.db import models, transaction
from langchain_chroma import Chroma

from submind.prompts.prompts import TOOL_RESULT_PROMPT


def get_fundraises():
    pass


def get_industry():
    pass


# can we turn a plaintext query into a structured airtable query? Maybe for now, just create a fulltext field and dump in pinecone?
schema = {

}


def get_airtable_schema(base_id, api_key):
    url = f'https://api.airtable.com/v0/meta/bases/{base_id}/tables'
    headers = {
        'Authorization': f'Bearer {api_key}',
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.json()


# Function to map Airtable field types to Django field types
def map_field_type(airtable_type):
    mapping = {
        'singleLineText': 'models.CharField(max_length=255)',
        'multilineText': 'models.TextField()',
        'richText': 'models.TextField()',
        'number': 'models.FloatField()',
        'checkbox': 'models.BooleanField()',
        'date': 'models.DateField()',
        'dateTime': 'models.DateTimeField()',
        'currency': 'models.DecimalField(max_digits=10, decimal_places=2)',
        'percent': 'models.FloatField()',
        'singleSelect': 'models.CharField(max_length=255)',
        'multipleSelects': 'models.JSONField()',
        'attachment': 'models.JSONField()',
        'formula': 'models.CharField(max_length=255)',
        'rollup': 'models.CharField(max_length=255)',
        'createdTime': 'models.DateTimeField(auto_now_add=True)',
        'lastModifiedTime': 'models.DateTimeField(auto_now=True)',
        'autoNumber': 'models.AutoField()',
    }
    return mapping.get(airtable_type, 'models.CharField(max_length=255)')


# Function to generate Django model
def generate_django_model(table):
    model_name = table['name'].replace(' ', '')
    fields = table['fields']

    model = f'class {model_name}(models.Model):\n'
    for field in fields:
        field_name = field['name'].replace(' ', '_').lower()
        field_type = map_field_type(field['type'])
        model += f'    {field_name} = {field_type}\n'

    return model


# Function to fetch data from Airtable
def fetch_airtable_data(table_name, base_id, api_key):
    url = f"{config('AIRTABLE_API_URL')}/{base_id}/{table_name}"
    headers = {
        'Authorization': f'Bearer {api_key}',
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.json()['records']


# Function to populate Django database
def populate_django_db(table_name, model, base_id, api_key):
    data = fetch_airtable_data(table_name, base_id, api_key)
    ModelClass = globals()[model]
    with transaction.atomic():
        for record in data:
            fields = record['fields']
            model_instance = ModelClass()
            for field_name, value in fields.items():
                django_field_name = field_name.replace(' ', '_').lower()
                setattr(model_instance, django_field_name, value)
            model_instance.save()

def get_all_records():
    api = Api(config('PRELO_AIRTABLE_API_KEY'))
    table = api.table(config('PRELO_BASE_ID'), config('PRELO_TABLE_ID'))
    records = table.all()
    return records

def get_no_warm_investor_records():
    api = Api(config('PRELO_AIRTABLE_API_KEY'))
    table = api.table(config('PRELO_BASE_ID'), config('PRELO_NO_WARM_TABLE_ID'))
    records = table.all()
    return records

def get_angel_records():
    api = Api(config('PRELO_AIRTABLE_API_KEY'))
    table = api.table(config('PRELO_BASE_ID'), config('PRELO_ANGEL_TABLE_ID'))
    records = table.all()
    return records



def format_records(records):
    formatted_strings = []
    for record in records:
        formatted_record = ", ".join(f"{key}: {value}" for key, value in record.items())
        formatted_strings.append(formatted_record)
    return formatted_strings


def load_record_vectors(records):
    metadatas = []
    ids = []
    chunks_to_save = []
    for record in records:
        formatted_record = ", ".join(f"{key}: {value}" for key, value in record['fields'].items())
        id = str(uuid.uuid4())
        metadata = {
            "airtable_id": record['id'],
            "created_at": record['createdTime'],
            "founders": record['fields'].get("Founders", ""),
            "stage": record['fields'].get("Funding Stage", ""),
            "industry": record['fields'].get("Industry", ""),
            "funding_amount": record['fields'].get("Last Funding (USD)", ""),
        }

        chunks_to_save.append(formatted_record)
        metadatas.append(metadata)
        ids.append(id)

    collection.add(documents=chunks_to_save, metadatas=metadatas, ids=ids)

def load_angel_investor_records(records):
    metadatas = []
    ids = []
    chunks_to_save = []
    for record in records:
        formatted_record = ", ".join(f"{key}: {value}" for key, value in record['fields'].items())
        id = str(uuid.uuid4())
        metadata = {
            "airtable_id": record['id'],
            "created_at": record['createdTime'],
            "investor": record['fields'].get("Investor (VC or Angel)", ""),
            "contact_name": record['fields'].get("Contact Name", ""),
            "industry": str(record['fields'].get("Preferred Sectors", "")),
            "funding_stage": str(record['fields'].get("Preferred Investment Size", "")),
            "funding_amount": record['fields'].get("Last Funding (USD)", ""),
        }

        chunks_to_save.append(formatted_record)
        metadatas.append(metadata)
        ids.append(id)
    collection.add(documents=chunks_to_save, metadatas=metadatas, ids=ids)



def load_no_warm_intro_investor_records(records):
    metadatas = []
    ids = []
    chunks_to_save = []
    for record in records:
        formatted_record = ", ".join(f"{key}: {value}" for key, value in record['fields'].items())
        record_id = str(uuid.uuid4())
        metadata = {
            "airtable_id": record['id'],
            "created_at": record['createdTime'],
            "investor": record['fields'].get("Investor (VC or Angel)", ""),
            "contact_name": record['fields'].get("Contact Name", ""),
            "industry": str(record['fields'].get("Industry", "")),
            "check_size": str(record['fields'].get("Check Size $", "")),
            "funding_stage": str(record['fields'].get("Funding Stage", "")),
            "geography": str(record['fields'].get("Geography", "")),
        }
        
        print(f'record: {formatted_record}')
        chunks_to_save.append(formatted_record)
        metadatas.append(metadata)
        ids.append(record_id)

        collection.add(documents=[formatted_record], metadatas=[metadata], ids=[record_id])
        print("added record")

    


TOOL_DESCRIPTION = "This tool allows you to get information about startups, their founders, and their fundraising efforts"


@traceable
def query_records(query, previous_results=None):
    embeddings = OpenAIEmbeddings(
                            model="text-embedding-3-small",
                            openai_api_key=config("OPENAI_API_KEY"),
                            openai_api_base=config('OPENAI_API_BASE'),
                            headers={
                                "Helicone-Auth": f"Bearer {config('HELICONE_API_KEY')}"
                            })
    langchain_chroma = Chroma(
        client=chroma_client,
        collection_name="investor_lookup",
        embedding_function=embeddings,
    )

   
    matched = langchain_chroma.similarity_search(query, k=5)
    docs = "\n\n".join(doc.page_content for doc in matched)
    model = ChatOpenAI(model="gpt-4-turbo", openai_api_key=config("OPENAI_API_KEY"))
    prompt = ChatPromptTemplate.from_template(TOOL_RESULT_PROMPT)
    chain = prompt | model | StrOutputParser()
    combined_query = f"{previous_results}\n\n{query}" if previous_results else query
    response = chain.invoke({
        "query": combined_query,
        "tool_description": TOOL_DESCRIPTION,
        "tool_output": docs
    })
    print(response)
    return response

In [3]:
angel_records = get_angel_records()

In [4]:
no_warm_intro_records = get_no_warm_investor_records()

In [18]:
query_records("angel investor in the united states that invests in healthcare")

Here is the information about angel investors in the United States that invest in healthcare, formatted for clarity and ease of comparison:

1. **US Angel Investors**
   - **Location:** Palo Alto, CA
   - **Contact Name:** John Ricci
   - **Title:** Founder & President
   - **Description:** A group that provides counsel and capital to startups in various industries including healthcare and medical devices.
   - **Preferred Sectors:** Software
   - **Type:** Angel Group
   - **Website:** Not provided

2. **Venture-med**
   - **Location:** Orinda, CA
   - **Website:** [www.venture-med.com](http://www.venture-med.com)
   - **Phone Number:** +1 (858) 369-3888
   - **Description:** An organization of accredited investors focused on healthcare startups, offering seed funding and mentorship.
   - **Preferred Sectors:** Commercial Services, Software
   - **Type:** Angel Group

3. **Health Wildcatters**
   - **Location:** Dallas, TX
   - **Website:** [www.healthwildcatters.com](http://www.healt

'Here is the information about angel investors in the United States that invest in healthcare, formatted for clarity and ease of comparison:\n\n1. **US Angel Investors**\n   - **Location:** Palo Alto, CA\n   - **Contact Name:** John Ricci\n   - **Title:** Founder & President\n   - **Description:** A group that provides counsel and capital to startups in various industries including healthcare and medical devices.\n   - **Preferred Sectors:** Software\n   - **Type:** Angel Group\n   - **Website:** Not provided\n\n2. **Venture-med**\n   - **Location:** Orinda, CA\n   - **Website:** [www.venture-med.com](http://www.venture-med.com)\n   - **Phone Number:** +1 (858) 369-3888\n   - **Description:** An organization of accredited investors focused on healthcare startups, offering seed funding and mentorship.\n   - **Preferred Sectors:** Commercial Services, Software\n   - **Type:** Angel Group\n\n3. **Health Wildcatters**\n   - **Location:** Dallas, TX\n   - **Website:** [www.healthwildcatters

In [11]:
# load_angel_investor_records(angel_records)
# load_no_warm_intro_investor_records(no_warm_intro_records) - breaks on empty record at end

record: Contact Name: Alex Kopelyan, Investor (VC or Angel): IndieBio (SOSV), Industry: ['Agritech', 'BioTech', 'CleanTech', 'Deep tech', 'Digital Health', 'Foodtech', 'Sustainable Materials', 'MedTech & Healthcare', 'FemTech', 'Love crazy ideas', 'Mental Health'], Check Size $: 250,000, Funding Stage: ['Pre-seed', 'Idea/first check'], Do you lead rounds?: ['Accelerator'], # of cold outreach investments made:: Don't know the number, but a good percent of our 150 investments, Additional notes for founders:: We invest in cohorts, up to 15 companies roughly twice per year. We do investments beyond the accelerator into seed, A, and beyond. However that is only for accelerator alum.
added record
record: Contact Name: Mark Pearson, Investor (VC or Angel): Fuel Ventures, Industry: ['Adtech', 'AI/ML', 'AR/VR', 'B2B', 'B2C', 'Beauty', 'Beauty & wellness', 'CleanTech', 'Consumer', 'Cybersecurity', 'Digital Health', 'Digital health', 'DTC', 'E-commerce', 'Edtech', 'Enterprise Software', 'Fintech'

BadRequestError: Error code: 400 - {'error': {'message': "'$.input' is invalid. Please check the API reference: https://platform.openai.com/docs/api-reference.", 'type': 'invalid_request_error', 'param': None, 'code': None}}