In [29]:
import openai
import os
from pydantic import BaseModel
import json
class ScoreResponse(BaseModel):
    score: int
    explanation: str

# Set your API key
api_key = os.getenv("OPENAI_KEY")

# Initialize the OpenAI API client
openai.api_key = api_key


In [28]:
ScoreResponse.schema()

{'title': 'ScoreResponse',
 'type': 'object',
 'properties': {'score': {'title': 'Score', 'type': 'integer'},
  'explanation': {'title': 'Explanation', 'type': 'string'}},
 'required': ['score', 'explanation']}

In [65]:
def generate_profile_description(user_description):
    response = openai.Completion.create(
        engine="davinci-002",
        prompt=f"Generate a user profile description: {user_description}",
        max_tokens=50,  # Adjust the token limit as needed
        n=1,  # Number of responses
        stop=None,  # Stop tokens (optional)
    )
    return response.choices[0].text.strip()

def generate_matching_criteria(user_profile, investor_profile):
    # Generate matching criteria using LLM
    criteria = []
    criteria.append("- The founder's name is: " + user_profile["Name"])
    criteria.append("- They have started a company whose name is "+user_profile["Company"])
    criteria.append("- The industry they are building the product for is " + user_profile["Industry"] + 
                    " and the specific verticals they are targetting are " + user_profile["Verticals"] + ".")
    criteria.append("- Location where they are targetting their product for is, i.e their target customer location is : "+user_profile["Target Market (Locations)"] + 
                    " and they are looking out to raise money for " + user_profile["Investment Stage"] +" stage." )
    criteria.append("- The amount they are targetting to raise for is around " + str(user_profile["Planned Raise Amount"]) + 
                    " and their main traction points are: " + user_profile["Traction"] + ".")
    criteria.append("- Their Revenue/profitability at this moment is " + user_profile["Revenue/profitability"] + " and their fundraising history is "+ user_profile["Fundraising history"] + ".")
    criteria.append("- We should classify their products ip(Intellectual property) as " + user_profile["Intellectual property"] + " and the competitive landscape for them looks like " + user_profile["Competitive landscape"] + ".")
    criteria.append(f"- Number of customers that are paying for their {user_profile['Target Customer']} product is " + 
                    user_profile["Customer Base (# paying clients)"] + 
                    ", B2B products have lesser customers than B2C products for obvious reasons " + 
                    "and their growth potential is " +
                    user_profile["Growth potential"])
    
    # Combine generated criteria into a single text
    startup_description = "\n".join(criteria)

    criteria = f'''- We have {investor_profile["Fund from Fund Master"]} as an investor.\n'''\
    f'''- They invest in businesses that build product in the industry of {investor_profile["Industry Final"]} '''\
    f'''and specific verticals like {investor_profile["Vertical Final"]}.\n- This investor can be classified as a {investor_profile["Investor Type"]}''' \
    f''' and their preferred geography for investment is {investor_profile["Preferred Geography (from Fund from Fund Master)"]}.\n''' \
    f'''- They prefer to invest in {investor_profile["Preferred Investment Stage"]} and provide their funds as {investor_profile["Preferred Investment Type"]}.\n''' \
    f'''- They usually invest in {investor_profile["Target Customer"]} business.\n- The minimum amount they are planning to invest is {investor_profile["Min check size"]} Million and '''\
    f'''the max investment they are planning to make is {investor_profile["Max Check size"]} Million. Impact Investor? {investor_profile["Impact Investor"]}.\n'''\
    f'''- Their return expectations is {investor_profile["Return Expectations"]} and their network and support resources are {investor_profile["Network and Support Resources "]}.\n'''\
    f'''- Their decision making timeline is {investor_profile["Decision-making timeline"]}, this basically tell how much time do they take to come to conclusion whether they want to invest or not.\n'''\
    f'''- Out of 5 we can rate their reputation as  {investor_profile["Reputation"]}.\n'''

    investor_description = criteria
    # key_aspects = f"""- Industry and vertical Criteria: Industry and vertical overlap is important, that doesn't mean each industry and vertical a investor is planning to invest in """\
    #               f"""should be present in the founder portfolio, but something should overlap, if a founder is creating product for healthcare then healtcare or something closer to it """\
    #               f"""should be there in the vertical or industry investor is planning to invest in. Maybe industry doesn't have it then something should be matching in verticals\n"""\
    #               f"""- Similary for Planned raised amount should be in between of min check size and max check size of investor(min check of 0.25 means 0.25 million dollars i.e. 250000 dollars, and this applies to max check size too). """\
    #               f"""- Matching will also change based on for example round progress, if the investor min check sizze is $200k and a founder has only 150k left to raise in her round, that investor will not a match any more\n"""\
    #               f"""- Investment stage: Founder looking for seed fund should be getting a investor that is looking to invest in that stage, data will have other key aspects like early stage or late stage so context based knowdlege should be used for matching\n"""\
    #               f"""- Traction: Traction of founder i.e. what specialiality is it bring onboard and based on that how attractive they might seem to a investor should also be considered\n"""\
    #               f"""- Revenue: If the founder is already in revenue stage or early revenue stage and if that is recurring too, that would seem promising to an investor.\n"""\
    #               f"""- IP and Competitive landscape: This is another key aspect, how strong or weak the IP of the founder is and how competitive is there market, helps in projecting how much market """\
    #               f"""will the be able to capture and how easily will the be able to generate return for founder, having a competitive market but strong IP can also make a founder lucrative\n"""\
    #               f"""- Target market location is important, a founder building product for a speicific geography then the investor should also be willing to invest in that location\n"""\
    #               f"""- For the rest, providing weightage is challenging. For many of the fields, they become relevant as a relative score given the context of comparing companies. """\
    #               f"""For example, a medtech company with a FDA approval will be considered a better company to invest in vs another one with a potentially great product but no approval yet. """\
    #               f"""So independently, both companies could get an equal matching score. We would love to see them not get the same score somehow."""
    # user_context = f"""TASK: Generate matching criteria among a founder and investor\n""" \
    #                 f"""KEY FACTORS TO CONSIDER:\n{key_aspects}"""\
    #                 f"""Please take into account many other key aspects that you as an expert broker will take \n"""\
    #                 f"""On the basis of above context generate matching criteria:\n\nStartup Profile:\n{startup_description}\n\nInvestor Profile:\n{investor_description}"""
    # response = openai.ChatCompletion.create(
    #     model="gpt-3.5-turbo",
    #     messages = [
    #         {"role": "system", "content" : "You are Broker that has expertise in VC, investment and managing funds and you are working on investors meet new founders that will profit them"},
    #         {"role": "user", "content" : user_context}
    #     ],
    # )
    # matching_criteria = response["choices"][0]['message']['content']
    return startup_description, investor_description

def generate_matching_score(startup_description, investor_description):
    key_aspects = f"""- Industry and vertical: There should be matching occuring between founder's industry or verticals and investor's indsutry or vertical, if it's not then these they shouldn't match, some investors write very generic industry and vertical, so try and be as specific as possible when matching, specifity is very important """\
                  f"""If a founder is creating product for healthcare then healtcare should be present in vertical or industry that the investor is interested in.\n"""\
                  f"""- Similary for Planned raised amount should be in between of min check size and max check size of investor(min check of 0.25 means 0.25 million dollars i.e. 250000 dollars, and this applies to max check size too). """\
                  f"""- Matching will also change based on for example round progress, if the investor min check sizze is $200k and a founder has only 150k left to raise in her round, that investor will not a match any more\n"""\
                  f"""- Investment stage: Founder looking for seed fund should be getting a investor that is looking to invest in that stage, data will have other key aspects like early stage or late stage so context based knowdlege should be used for matching\n"""\
                  f"""- Traction: Traction of founder i.e. what specialiality is it bring onboard and based on that how attractive they might seem to a investor should also be considered\n"""\
                  f"""- Revenue: If the founder is already in revenue stage or early revenue stage and if that is recurring too, that would seem promising to an investor.\n"""\
                  f"""- IP and Competitive landscape: This is another key aspect, how strong or weak the IP of the founder is and how competitive is there market, helps in projecting how much market """\
                  f"""will the be able to capture and how easily will the be able to generate return for founder, having a competitive market but strong IP can also make a founder lucrative\n"""\
                  f"""- Target market location is important, a founder building product for a speicific geography then the investor should also be willing to invest in that location\n"""\
                  f"""- For the rest, providing weightage is challenging. For many of the fields, they become relevant as a relative score given the context of comparing companies. """\
                  f"""For example, a medtech company with a FDA approval will be considered a better company to invest in vs another one with a potentially great product but no approval yet. """\
                  f"""So independently, both companies could get an equal matching score. We would love to see them not get the same score somehow."""
    
    matching_criteria_weightage = f"""1. Industry and vertical overlap: Most important, if no indsutry or vertical is overlapping between founder and investor score must be less than 50 don't look for small overlap\n"""\
                                  f"""2. Planned raised amount: Most important, if the planned raise amount is not within investors range the score must be less than 50 and don't consider other key factors at all\n"""\
                                  f"""3. Investment stage: Weightage is high, again some matching should be there otherwise it's a time waste for investor\n"""\
    
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages = [
            {"role": "system", "content" : "You are Broker that has expertise in VC, investment and managing funds and you are working on investors meet new founders that will profit them"},
            {"role": "user", "content" : f"""Generate matching score among a founder and investor, an investor gets 1000s of founder's profile so the matching score should be tight and useful.\n"""\
                                         f"""Please take into account many other key aspects that you as an expert broker will take, some key important factors:\n"""
                                         f"""{key_aspects}\n CHEKS TO KEEP IN MIND:\n {matching_criteria_weightage}\nOn the basis of above context calculate matching score:\n\n\n\nStartup Profile:\n{startup_description}\n\nInvestor Profile:\n{investor_description}\n\nOutput: give a score and explanation of around 50 words\n"""}
        ],
        functions=[
        {
          "name": "get_answer_for_user_query",
          "description": "Get matching score and reason behind it",
          "parameters": ScoreResponse.schema()
        }
    ],
    function_call={"name": "get_answer_for_user_query"}
    )
    output = json.loads(response.choices[0]["message"]["function_call"]["arguments"])
    # output = response["choices"][0]['message']['content']  # Assuming LLM returns a numerical score
    return output


In [3]:
import csv

founders_data = []
with open('data/founder_v2.csv', 'r') as file:
    reader = csv.reader(file)
    headers = next(reader)
    for row in reader:
        founders_data.append(dict(zip(headers, row)))

founder_document = founders_data[10]

In [4]:
import csv

investors_data = []
with open('data/inv_clean_v1.csv', 'r') as file:
    reader = csv.reader(file)
    headers = next(reader)
    for row in reader:
        investors_data.append(dict(zip(headers, row)))

inv_document = investors_data[0]

In [5]:
founder_document = {k.replace('\ufeffName', 'Name'): v for k, v in founder_document.items()}
founder_document

{'Name': 'Clair Marie McDade',
 'Company': 'Archneura Inc.',
 'Email': 'clairmarie@archneura.com',
 'Industry': 'Real Estate',
 'Verticals': 'Real Estate Technology,Construction Technology',
 'Target Customer': 'B2C',
 'Geography': 'United States of America (USA)',
 'Target Market (Locations)': 'United States of America (USA)',
 'Investment Stage': 'SEED',
 'Planned Raise Amount': '$1M to $2M',
 'Market size': '$5Billion to $10Billion',
 'Business Revenue Model': 'Retail',
 'Stage of development': 'SEED',
 'Revenue/profitability': 'Recurring',
 'Fundraising history': 'Friends and Family',
 'Traction': 'Certifications,Awards & recognition',
 'Team Key Roles & experience': 'Weak',
 'Intellectual property': 'Strong ',
 'Competitive landscape': 'No Competition',
 'Customer Base (# paying clients)': '90.0',
 'Growth potential': 'High',
 'Management experience': 'Startup Professional,First time founder'}

In [32]:
inv_document

{'\ufeffInvestor Name': 'Maarten Goossens',
 'Fund from Fund Master': 'Anterra Capital',
 'Target Customer': 'B2B',
 'Industry Final': 'Agriculture & Forestry,Energy and Climate,Food and Beverage,Healthcare',
 'Vertical Final': 'AgTech,Climate Tech,Digital Health,HealthTech,Beauty & Wellness',
 'Investor Type': 'Venture Capital',
 'Website': 'www.anterracapital.com',
 'Preferred Geography (from Fund from Fund Master)': 'Asia, Canada, Europe, United States',
 'Preferred Investment Stage': 'Pre-SEED,Series A,SEED',
 'Preferred Investment Type': 'Convertible Debt,SAFE,Debt',
 'Min check size': '$0.10',
 'Max Check size': '$2.00',
 'HQ Location': 'Amsterdam, Netherlands',
 'URL (from Fund from Fund Master)': 'www.anterracapital.com',
 'Preferred TAM': '$5B to $10B',
 'Impact Investor': 'No',
 'Number of investments (Portfolio Size)': '90',
 'Return Expectations': '2X - 5X',
 'Exit Strategy Preference': 'Acquisition,IPO',
 'Network and Support Resources ': 'Heavily Supportive',
 'Due dilige

In [6]:
mc = generate_matching_criteria(founder_document, inv_document)

In [126]:
print(mc)

Matching criteria for founder and investor:

1. Industry and vertical overlap: Archneura Inc. is in the Real Estate and Construction Technology verticals, while Anterra Capital invests in the Agriculture & Forestry, Energy and Climate, Food and Beverage, and Healthcare industries, with specific verticals like AgTech, Climate Tech, Digital Health, and HealthTech. There is no direct industry overlap, but there is a potential vertical overlap in the area of Climate Tech as it relates to both real estate and agriculture.

2. Planned raised amount: Archneura Inc. is looking to raise between $1M to $2M, which falls within Anterra Capital's minimum and maximum check size of $0.10M to $2.00M.

3. Investment stage: Archneura Inc. is at the Seed stage, and Anterra Capital invests in Pre-Seed, Series A, and Seed stages. There is a match in terms of investment stage.

4. Traction: Archneura Inc. has certifications, awards, and recognition as their traction points, indicating some level of validati

In [54]:
founder_document

{'Name': 'Clair Marie McDade',
 'Company': 'Archneura Inc.',
 'Email': 'clairmarie@archneura.com',
 'Industry': 'Real Estate',
 'Verticals': 'Real Estate Technology,Construction Technology',
 'Target Customer': 'B2C',
 'Geography': 'United States of America (USA)',
 'Target Market (Locations)': 'United States of America (USA)',
 'Investment Stage': 'SEED',
 'Planned Raise Amount': '$1M to $2M',
 'Market size': '$5Billion to $10Billion',
 'Business Revenue Model': 'Retail',
 'Stage of development': 'SEED',
 'Revenue/profitability': 'Recurring',
 'Fundraising history': 'Friends and Family',
 'Traction': 'Certifications,Awards & recognition',
 'Team Key Roles & experience': 'Weak',
 'Intellectual property': 'Strong ',
 'Competitive landscape': 'No Competition',
 'Customer Base (# paying clients)': '90.0',
 'Growth potential': 'High',
 'Management experience': 'Startup Professional,First time founder'}

In [58]:
inv_document

{'\ufeffInvestor Name': 'Ben Jen',
 'Fund from Fund Master': 'Ben Jen Holdings',
 'Target Customer': 'B2B',
 'Industry Final': 'Software and Information Services,Media and Content,Communications & Networking,Electronics and Computer Hardware',
 'Vertical Final': '',
 'Investor Type': 'Venture Capital',
 'Website': 'www.benjenholdings.com',
 'Preferred Geography (from Fund from Fund Master)': 'United States',
 'Preferred Investment Stage': 'Pre-SEED,Series A,SEED',
 'Preferred Investment Type': 'Convertible Debt,SAFE,Debt',
 'Min check size': '$0.00',
 'Max Check size': '$0.10',
 'HQ Location': 'New York, NY',
 'URL (from Fund from Fund Master)': 'www.benjenholdings.com',
 'Preferred TAM': 'Under $5 Billion',
 'Impact Investor': 'No',
 'Number of investments (Portfolio Size)': '',
 'Return Expectations': '2X - 5X',
 'Exit Strategy Preference': 'Liquidation',
 'Network and Support Resources ': 'Weak',
 'Due diligence process': 'Outside Reliance',
 'Decision-making timeline': 'Weeks',
 'R

In [64]:
print(investor_description)

- We have Ben Jen Holdings as an investor.
- They invest in businesses that build product in the industry of Software and Information Services,Media and Content,Communications & Networking,Electronics and Computer Hardware and specific verticals like .
- This investor can be classified as a Venture Capital and their preferred geography for investment is United States.
- They prefer to invest in Pre-SEED,Series A,SEED and provide their funds as Convertible Debt,SAFE,Debt.
- They usually invest in B2B business.
- The minimum amount they are planning to invest is $0.00 Million and the max investment they are planning to make is $0.10 Million. Impact Investor? No.
- Their return expectations is 2X - 5X and their network and support resources are Weak.
- Their decision making timeline is Weeks, this basically tell how much time do they take to come to conclusion whether they want to invest or not.
- Out of 5 we can rate their reputation as  .



In [66]:
startup_description, investor_description = generate_matching_criteria(founder_document, inv_document)
generate_matching_score(startup_description, investor_description)

{'score': 40,
 'explanation': "Although both are based in the US, the sector mismatch and the investor's max check of $0.10M comes quite short of the founders planned raise of $1M to $2M create a compatibility issue."}

In [67]:
import csv

with open('matching_scores_1.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['founder_info', 'investor_info', 'score', 'explanation'])
    for inv_document in investors_data[:15]:
        startup_description, investor_description = generate_matching_criteria(founder_document, inv_document)
        matching_score = generate_matching_score(startup_description, investor_description)
        # matching_score = generate_matching_score(founder_document, inv_document)
        writer.writerow([startup_description, investor_description, matching_score['score'], matching_score['explanation']])
        print(f"{inv_document['Fund from Fund Master']} done")

Anterra Capital done
Propel Venture Partners done
Sevin Rosen Funds done
Tuesday Capital done
TeleSoft Partners done
NAV.VC done
SeedInvest done
180 Degree Capital (NAS: TURN) done
Brighter Capital done
Tandem Capital done
Tech Coast Angels done
AI8 Ventures done
Persephone Venture Partners done
Leawood Venture Capital done
Ben Jen Holdings done


In [111]:
print(
    f"""- Industry and vertical Criteria: Industry and vertical overlap is important, that doesn't mean each industry and vertical a investor is planning to invest in """\
                  f"""should be present in the founder portfolio, but something should overlap, if a founder is creating product for healthcare then healtcare or something closer to it """\
                  f"""should be there in the vertical or industry investor is planning to invest in. Maybe industry doesn't have it then something should be matching in verticals\n"""\
                  f"""- Similary for Planned raised amount should be in between of min check size and max check size of investor(min check of 0.25 means 0.25 million dollars i.e. 250000 dollars, and this applies to max check size too). """\
                  f"""- Matching will also change based on for example round progress, if the investor min check sizze is $200k and a founder has only 150k left to raise in her round, that investor will not a match any more\n"""\
                  f"""- Investment stage: Founder looking for seed fund should be getting a investor that is looking to invest in that stage, data will have other key aspects like early stage or late stage so context based knowdlege should be used for matching\n"""\
                  f"""- Traction: Traction of founder i.e. what specialiality is it bring onboard and based on that how attractive they might seem to a investor should also be considered\n"""\
                  f"""- Revenue: If the founder is already in revenue stage or early revenue stage and if that is recurring too, that would seem promising to an investor.\n"""\
                  f"""- IP and Competitive landscape: This is another key aspect, how strong or weak the IP of the founder is and how competitive is there market, helps in projecting how much market """\
                  f"""will the be able to capture and how easily will the be able to generate return for founder, having a competitive market but strong IP can also make a founder lucrative\n"""\
                  f"""- Target market location is important, a founder building product for a speicific geography then the investor should also be willing to invest in that location\n"""\
                  f"""- For the rest, providing weightage is challenging. For many of the fields, they become relevant as a relative score given the context of comparing companies. """\
                  f"""For example, a medtech company with a FDA approval will be considered a better company to invest in vs another one with a potentially great product but no approval yet. """\
                  f"""So independently, both companies could get an equal matching score. We would love to see them not get the same score somehow."""
)

- Industry and vertical Criteria: Industry and vertical overlap is important, that doesn't mean each industry and vertical a investor is planning to invest in should be present in the founder portfolio, but something should overlap, if a founder is creating product for healthcare then healtcare or something closer to it should be there in the vertical or industry investor is planning to invest in. Maybe industry doesn't have it then something should be matching in verticals
- Similary for Planned raised amount should be in between of min check size and max check size of investor(min check of 0.25 means 0.25 million dollars i.e. 250000 dollars, and this applies to max check size too). - Matching will also change based on for example round progress, if the investor min check sizze is $200k and a founder has only 150k left to raise in her round, that investor will not a match any more
- Investment stage: Founder looking for seed fund should be getting a investor that is looking to invest 

In [121]:
key_aspects = f"""- Industry and vertical Criteria: Industry and vertical overlap is important, that doesn't mean each industry and vertical a investor is planning to invest in """\
                  f"""should be present in the founder portfolio, but something should overlap, if a founder is creating product for healthcare then healtcare or something closer to it """\
                  f"""should be there in the vertical or industry investor is planning to invest in. Maybe industry doesn't have it then something should be matching in verticals\n"""\
                  f"""- Similary for Planned raised amount should be in between of min check size and max check size of investor(min check of 0.25 means 0.25 million dollars i.e. 250000 dollars, and this applies to max check size too). """\
                  f"""- Matching will also change based on for example round progress, if the investor min check sizze is $200k and a founder has only 150k left to raise in her round, that investor will not a match any more\n"""\
                  f"""- Investment stage: Founder looking for seed fund should be getting a investor that is looking to invest in that stage, data will have other key aspects like early stage or late stage so context based knowdlege should be used for matching\n"""\
                  f"""- Traction: Traction of founder i.e. what specialiality is it bring onboard and based on that how attractive they might seem to a investor should also be considered\n"""\
                  f"""- Revenue: If the founder is already in revenue stage or early revenue stage and if that is recurring too, that would seem promising to an investor.\n"""\
                  f"""- IP and Competitive landscape: This is another key aspect, how strong or weak the IP of the founder is and how competitive is there market, helps in projecting how much market """\
                  f"""will the be able to capture and how easily will the be able to generate return for founder, having a competitive market but strong IP can also make a founder lucrative\n"""\
                  f"""- Target market location is important, a founder building product for a speicific geography then the investor should also be willing to invest in that location\n"""\
                  f"""- For the rest, providing weightage is challenging. For many of the fields, they become relevant as a relative score given the context of comparing companies. """\
                  f"""For example, a medtech company with a FDA approval will be considered a better company to invest in vs another one with a potentially great product but no approval yet. """\
                  f"""So independently, both companies could get an equal matching score. We would love to see them not get the same score somehow."""
user_context = f"""TASK: Generate matching criteria among a founder and investor\n""" \
                    f"""KEY FACTORS TO CONSIDER:\n{key_aspects}"""\
                    f"""Please take into account many other key aspects that you as an expert broker will take \n"""

In [122]:
print(user_context)

TASK: Generate matching criteria among a founder and investor
KEY FACTORS TO CONSIDER:
- Industry and vertical Criteria: Industry and vertical overlap is important, that doesn't mean each industry and vertical a investor is planning to invest in should be present in the founder portfolio, but something should overlap, if a founder is creating product for healthcare then healtcare or something closer to it should be there in the vertical or industry investor is planning to invest in. Maybe industry doesn't have it then something should be matching in verticals
- Similary for Planned raised amount should be in between of min check size and max check size of investor(min check of 0.25 means 0.25 million dollars i.e. 250000 dollars, and this applies to max check size too). - Matching will also change based on for example round progress, if the investor min check sizze is $200k and a founder has only 150k left to raise in her round, that investor will not a match any more
- Investment stage