# Crunchbase API v4 set up - Boardroom Allies

**From CrunchBase**:
- How to Use the API - https://data.crunchbase.com/docs/using-the-api
- How to find which fields and data are available via our API - https://data.crunchbase.com/docs/available-data
- Swaggerhub (contains API calls and responses) - https://app.swaggerhub.com/apis-docs/Crunchbase/crunchbase-enterprise_api/#/Entity/get_entities_organizations__entity_id_
- API Reference - https://data.crunchbase.com/reference


### Next Steps (8/10):
- Query 1: Organization --> Financial Summary, Funding Rounds, Board Members
- Query 2: Investor --> Board Affiliations, Partner Funding Rounds
- Query 3: Match-making (Corporate BoD <> Partner Investors)
- Push to Google Sheets: https://towardsdatascience.com/how-to-integrate-google-sheets-and-jupyter-notebooks-c469309aacea
- Create crunchbase-p1 module with generalized queries and interactive input

### Import all necessary packages

In [178]:
# Requests allows you to send HTTP/1.1 requests extremely easily.
# There’s no need to manually add query strings to your URLs, or to form-encode your POST data. 
# Keep-alive and HTTP connection pooling are 100% automatic, thanks to urllib3.
import requests
import json
import pandas as pd
from pandas import json_normalize 
from operator import itemgetter
from user_key import userkey # Pulls in P1s Crunchbase API user key

# Functions needed

In [179]:
def url_count(query, query_type): 
    # POST method with API URL, query_type as a parameter, and passing query as json.
    # https://www.w3schools.com/python/ref_requests_post.asp
    r = requests.post("https://api.crunchbase.com/api/v4/searches/" + query_type, params = userkey , json = query)
    result = json.loads(r.text)
    count = result["count"]
    return count

def url_extraction(query, query_type):    
    # Create global raw variable
    global raw   
    # POST method with API URL, query_type as a parameter, and passing query as json.
    # https://www.w3schools.com/python/ref_requests_post.asp
    r = requests.post("https://api.crunchbase.com/api/v4/searches/" + query_type, params = userkey , json = query)
    result = json.loads(r.text)
    normalized_raw = json_normalize(result['entities'])
    # Append normalized entity results to global raw variable
    raw = raw.append(normalized_raw, ignore_index=True)

def autocompletes(query, collection_ids_list=None, limit=None):
    '''
    Suggests matching Identifier entities based on the query and entity_def_ids provided.
    
    QUERY
    Value to perform the autocomplete search with.
    
    COLLECTION_IDS_LIST
    A comma separated list of collection ids to search against. 
    Leaving this blank means it will search across all identifiers. 
    Entity defs can be constrained to specific facets by providing them as facet collections. 
    Relationship collections will resolve to their underlying entity def.
    Collection ids are: organizations, people, funding_rounds, acquisitions, investments,
    events, press_references, funds, event_appearances, ipos, ownerships, categories, 
    category_groups, locations, jobs
    
    LIMIT
    Number of results to retrieve; default = 10, max = 25
    '''
    params = {**userkey, "query": query}
    if collection_ids_list and type(collection_ids_list) == list:
        params.update({"collection_ids": collection_ids_list})
    if limit and type(limit) == int:
        params.update({"limit": limit})
    r = requests.get("https://api.crunchbase.com/api/v4/autocompletes", params = params)
    result = json.loads(r.text)
    normalized_result = json_normalize(result["entities"])
    dataframe = pd.DataFrame.from_dict(normalized_result)
    return dataframe

def whoKnows(name, df, person_first=True):
    '''
    This function takes in a name string and a dataframe generated by the url_extraction(query, "jobs") function.
    
    Returns a string that is a concatenated list of unique names mapped to their companies, excluding the rows of the named person.
    
    This is a Pledge 1% action that helps us see who knows a specific Boardroom Ally and how, pulled from the Boardroom Allies affiliations dataframe.
    
    If person_first is True (default action), the output will aggregate by person. 
    matches_str = 'Name1 (Company1, Company), Name2 (Company2), Name3 (Company1), ...'
    
    If person_first is False, the output will aggregate by company
    matches_str = 'Company1 (Name1, Name2, Name3), Company2 (Name1), Company3 (Name2, Name3), ...'
    '''
    # Make a list of all unique companies affiliated with input name
    company_matches = set(df["properties.organization_identifier.value"][df["properties.person_identifier.value"] == name].to_list())
    
    # Create matches dataframe, filtering by those that match the unique company list. 
    matches_df = df[df["properties.organization_identifier.value"].isin(company_matches)]
    matches_df = matches_df[matches_df["properties.person_identifier.value"] != name].sort_values(["properties.organization_identifier.value"])
    
    # Create intermediate dictionary which will de-dup based on `person_first` value
    matches_list_names = matches_df["properties.person_identifier.value"].to_list()
    matches_list_co = matches_df["properties.organization_identifier.value"].to_list()
    matches_dict = {}
    
    if person_first:
        # Aggregate by name.
        for i in range(len(matches_list_names)):
            if matches_list_names[i] in matches_dict.keys():
                matches_dict[matches_list_names[i]].append(matches_list_co[i])
            else:
                matches_dict[matches_list_names[i]] = [matches_list_co[i]]
    
    if not person_first:
        # Aggregate by company.
        for i in range(len(matches_list_names)):
            if matches_list_co[i] in matches_dict.keys():
                matches_dict[matches_list_co[i]].append(matches_list_names[i])
            else:
                matches_dict[matches_list_co[i]] = [matches_list_names[i]]    
    
    # Create output string
    matches_str = ""
    for key, value in matches_dict.items():
        matches_str += key + " ("
        forloop_str = ""
        for i in range(len(matches_dict[key])):
            if i == len(matches_dict[key]) - 1:
                forloop_str += matches_dict[key][i] + ")"
                continue
            forloop_str += matches_dict[key][i] + ", "
        matches_str += forloop_str+ ", "
    # Remove extra space and comma
    matches_str = matches_str[:-2]
    
    return matches_str

def makequery_jobs(uuid, limit=1000):
    '''
    Job Search: Boardroom Ally Current Board/Executive Affiliations
    - Person includes list of `uuid` values
    - The job title is current (`is_current == True`)
    - Excludes `employee` level jobs
    '''
    query = {
        "field_ids": [
            "created_at",
            "employee_featured_order",
            "ended_on",
            "entity_def_id",
            "identifier",
            "is_current",
            "job_type",
            "name",
            "organization_identifier",
            "permalink",
            "person_identifier",
            "short_description",
            "started_on",
            "title",
            "updated_at",
            "uuid"],
        "limit": limit,
        "query": [
            {
                "type": "predicate",
                "field_id": "person_identifier",
                "operator_id": "includes",
                "values": uuid
            },
            {
                "type": "predicate",
                "field_id": "is_current",
                "operator_id": "eq",
                "values": ["true"]
            },
            {
                "type": "predicate",
                "field_id": "job_type",
                "operator_id": "not_includes",
                "values": ["employee"]
            }]
    }
    return query

def makequery_p1_jobs(uuid, limit=1000):
    '''
    Job Search: P1 Companies Current Board/Executive Affiliations
    - Organization includes list of `uuid` values
    - The job title is current (`is_current == True`)
    - Excludes `employee` and `executive` level jobs
    '''
    query = {
        "field_ids": [
            "created_at",
            "employee_featured_order",
            "ended_on",
            "entity_def_id",
            "identifier",
            "is_current",
            "job_type",
            "name",
            "organization_identifier",
            "permalink",
            "person_identifier",
            "short_description",
            "started_on",
            "title",
            "updated_at",
            "uuid"],
        "limit": limit,
        "query": [
            {
                "type": "predicate",
                "field_id": "organization_identifier",
                "operator_id": "includes",
                "values": uuid
            },
            {
                "type": "predicate",
                "field_id": "is_current",
                "operator_id": "eq",
                "values": ["true"]
            },
            {
                "type": "predicate",
                "field_id": "job_type",
                "operator_id": "not_includes",
                "values": ["employee", "executive"]
            }]
    }
    return query

### Get UUID for each Boardroom Ally target

In [180]:
search_jobs = pd.read_csv("Boardroom Allies Tracker - job_search_terms.csv").CONCAT.to_list()
search_jobs.sort()
uuid_jobs = []
for item in search_jobs:
    #print("\n{}.... Searching".format(item))
    found = autocompletes(item, ["people"], limit=1)
    uuid_jobs.append(found["identifier.uuid"][0])
    #print("Found...\n{}".format(found))
    #print("*"*100)

### Get UUID for each P1 Equity Company

In [203]:
search_companies = pd.read_csv("Boardroom Allies Tracker - company_search_terms.csv").Company.to_list()
uuid_companies = []
for item in search_companies:
    #print("\Searching for...{}".format(item))
    found = autocompletes(item, ["organizations"], limit=1)
    uuid_companies.append(found["identifier.uuid"][0])
    #print("\nFound...\n{}".format(found[["short_description", "identifier.value"]]))
    #print("*"*100)

## Boardroom Allies Tracker: Boardroom Allies Affiliations

In [185]:
query = makequery_jobs(uuid_jobs)
raw = pd.DataFrame() # Global raw variable
comp_count = url_count(query, "jobs") 
url_extraction(query, "jobs")
affiliations = raw[["properties.organization_identifier.value",
                    "properties.person_identifier.value", 
                    "properties.title", 
                    "properties.job_type", 
                    "properties.started_on.value",
                    "properties.updated_at"]].sort_values(["properties.organization_identifier.value"])
affiliations.to_csv("boardroom_allies_affiliations.csv")
affiliations.head()

Unnamed: 0,properties.organization_identifier.value,properties.person_identifier.value,properties.title,properties.job_type,properties.started_on.value,properties.updated_at
335,140 Proof,Ronald Conway,Member of the Board of Advisory,advisor,,2018-02-13T01:25:41Z
678,23andMe,Roelof Botha,Board Member,board_member,2017-09-01,2019-03-30T11:13:50Z
251,2U Inc.,Timothy Haley,Member of the Board of Directors,board_member,2009-01-01,2018-02-13T01:23:50Z
758,3Com,Sanford Robertson,Member of the Board of Directors,board_member,,2018-02-13T01:34:39Z
353,3D Robotics,Jason Mendelson,Board Member,board_member,,2018-02-13T11:48:53Z


## P1 Companies Board Members

In [210]:
query = makequery_p1_jobs(uuid_companies)
raw = pd.DataFrame() # Global raw variable
comp_count = url_count(query, "jobs") 
url_extraction(query, "jobs")
board_of_p1_companies = raw[["properties.organization_identifier.value",
                    "properties.person_identifier.value", 
                    "properties.title", 
                    "properties.job_type", 
                    "properties.started_on.value",
                    "properties.updated_at"]].sort_values(["properties.organization_identifier.value"])
board_of_p1_companies.to_csv("board_of_p1_companies_affiliations.csv")
board_of_p1_companies.head()

Unnamed: 0,properties.organization_identifier.value,properties.person_identifier.value,properties.title,properties.job_type,properties.started_on.value,properties.updated_at
32,Atlassian,Steve Sordello,Director,board_member,2015-11-01,2019-09-06T13:26:35Z
177,Atlassian,Enrique Salem,Director,board_member,2013-07-01,2019-09-06T13:19:08Z
105,Atlassian,Heather Mirjahangir,Director,board_member,2015-11-01,2019-09-06T13:26:35Z
114,Atlassian,Shona Brown,Director,board_member,2015-11-01,2019-09-06T13:26:35Z
20,Atlassian,Mike Cannon-Brookes,Director,board_member,2002-02-01,2019-09-06T12:48:01Z


## Boardroom Allies Tracker: Matchmaking

In [209]:
affiliations = pd.read_csv("boardroom_allies_affiliations.csv")
affiliations_p1 = affiliations[affiliations["properties.organization_identifier.value"].isin(search_companies)]
names = list(set(affiliations["properties.person_identifier.value"].to_list()))
names.sort()
knows = []
knows_byco = []
knows_byp1 = []
for item in names:
    knows.append(whoKnows(item, affiliations))
    knows_byco.append(whoKnows(item, affiliations, False))
    p1_string = ""
    knows_byp1_list = affiliations_p1[affiliations_p1["properties.person_identifier.value"] == item]["properties.organization_identifier.value"].to_list()
    for company in knows_byp1_list:
        p1_string += company + ", "
    p1_string = p1_string[:-2]
    knows_byp1.append(p1_string)
d = {'Name':names,'Knows':knows, 'Knows_ByCompany': knows_byco, "Knows_P1Company": knows_byp1}
affiliations_matchmaking = pd.DataFrame(d, columns = ["Name", "Knows", "Knows_ByCompany","Knows_P1Company"])
affiliations_matchmaking.to_csv("boardroom_allies_matchmaking.csv")
affiliations_matchmaking.head()

Unnamed: 0,Name,Knows,Knows_ByCompany,Knows_P1Company
0,Ajay Agarwal,"Enrique Salem (Bain Capital Ventures, Clari), ...","Bain Capital Ventures (Enrique Salem), Clari (...",
1,Andrew Chen,"Susan Su (500 Startups), Jeff Jordan (Andreess...","500 Startups (Susan Su), Andreessen Horowitz (...",Dropbox
2,Andy Weissman,David Pakman (YouNow),YouNow (David Pakman),
3,Arne Duncan,"Ryan Hinkle (Pluralsight, Turnitin), Tim Maudl...","Pluralsight (Ryan Hinkle, Tim Maudlin), Turnit...",Pluralsight
4,Ben Horowitz,"Jeff Jordan (Andreessen Horowitz), John O'Farr...","Andreessen Horowitz (Jeff Jordan, John O'Farre...",Okta
