In [42]:
# Requests allows you to send HTTP/1.1 requests extremely easily.
# There’s no need to manually add query strings to your URLs, or to form-encode your POST data. 
# Keep-alive and HTTP connection pooling are 100% automatic, thanks to urllib3.
import requests
import json
import pandas as pd
import csv
from pandas import json_normalize 
from operator import itemgetter
from user_key import userkey # Pulls in P1s Crunchbase API user key

In [56]:
def url_count(query, query_type): 
    # POST method with API URL, query_type as a parameter, and passing query as json.
    # https://www.w3schools.com/python/ref_requests_post.asp
    r = requests.post("https://api.crunchbase.com/api/v4/searches/" + query_type, params = userkey , json = query)
    #print(r.text)
    #print(r.url)
    result = json.loads(r.text)
    count = result["count"]
    return count

def url_extraction(query, query_type):    
    # Create global raw variable
    global raw   
    # POST method with API URL, query_type as a parameter, and passing query as json.
    # https://www.w3schools.com/python/ref_requests_post.asp
    r = requests.post("https://api.crunchbase.com/api/v4/searches/" + query_type, params = userkey , json = query)
    result = json.loads(r.text)
    normalized_raw = json_normalize(result['entities'])
    # Append normalized entity results to global raw variable
    raw = raw.append(normalized_raw, ignore_index=True)

def autocompletes(query, collection_ids_list=None, limit=None):
    '''
    Suggests matching Identifier entities based on the query and entity_def_ids provided.
    
    QUERY
    Value to perform the autocomplete search with.
    
    COLLECTION_IDS_LIST
    A comma separated list of collection ids to search against. 
    Leaving this blank means it will search across all identifiers. 
    Entity defs can be constrained to specific facets by providing them as facet collections. 
    Relationship collections will resolve to their underlying entity def.
    Collection ids are: organizations, people, funding_rounds, acquisitions, investments,
    events, press_references, funds, event_appearances, ipos, ownerships, categories, 
    category_groups, locations, jobs
    
    LIMIT
    Number of results to retrieve; default = 10, max = 25
    '''
    params = {**userkey, "query": query}
    if collection_ids_list and type(collection_ids_list) == list:
        params.update({"collection_ids": collection_ids_list})
    if limit and type(limit) == int:
        params.update({"limit": limit})
    r = requests.get("https://api.crunchbase.com/api/v4/autocompletes", params = params)
    result = json.loads(r.text)
    normalized_result = json_normalize(result["entities"])
    dataframe = pd.DataFrame.from_dict(normalized_result)
    return dataframe

def makequery_jobs_board_of_org(uuid_list, limit=1000):
    '''
    Job Search: Current Board Affiliations
    - Organization includes list of `uuid` values
    - Excludes `employee` and `executive` level jobs
    '''
    query = {
        "field_ids": [
            "entity_def_id",
            "identifier",
            "job_type",
            "name",
            "organization_identifier",
            "person_identifier",
            "short_description",
            "is_current",
            "started_on",
            "ended_on",
            "title",
            "updated_at",
            "uuid"],
        "limit": limit,
        "query": [
            {
                "type": "predicate",
                "field_id": "organization_identifier",
                "operator_id": "includes",
                "values": uuid_list
            },
            {
                "type": "predicate",
                "field_id": "job_type",
                "operator_id": "not_includes",
                "values": ["employee", "executive"]
            }]
    }
    return query

def people_entity(entity_id, field_ids_list=['primary_job_title', 'primary_organization', 'linkedin'], card_ids_list=None):
    '''
    
    ENTITY_ID
    UUID or permalink of desired entity
    
    FIELD_IDS
    Fields to include on the resulting entity - 
    either an array of field_id strings in JSON 
    or a comma-separated list encoded as string
    
    CARD_IDS
    Cards to include on the resulting entity - 
    array of card_id strings in JSON encoded as string\ 
    Card Ids for Person: [degrees, event_appearances, fields, 
    founded_organizations, jobs, participated_funding_rounds, 
    participated_funds, participated_investments, partner_funding_rounds, 
    partner_investments, press_references, primary_job, primary_organization]
    
    '''
    params = {**userkey}
    if field_ids_list and type(field_ids_list) == list:
        string = ""
        for item in field_ids_list:
            string += item+","
        string=string[:-1]
        params.update({"field_ids": string})
    if card_ids_list and type(card_ids_list) == list:
        string = ""
        for item in card_ids_list:
            string += item+","
        string=string[:-1]
        params.update({"card_ids": string})
    r = requests.get("https://api.crunchbase.com/api/v4/entities/people/"+str(entity_id), params = params)
    if r.text == "Usage limit exceeded":
        print('Usage Limited Exceeded, restart loop')
        return
    result = json.loads(r.text)
    uuid = result["properties"]["identifier"]["uuid"]
    try:
        linkedin = result["properties"]["linkedin"]["value"]
    except:
        linkedin = "NA"
    try:
        title = result["properties"]["primary_job_title"]
    except:
        title = "NA"
    try:
        org = result["properties"]["primary_organization"]["value"]
    except:
        org = "NA"
    
    return {uuid:title},{uuid:org},{uuid:linkedin}

## Pull UUIDs for list of companies, using `autocompletes` helper function

In [75]:
#companies = pd.read_csv("../crunchbase-api-setup/Boardroom Allies Tracker - company_search_terms.csv").Company.to_list()
companies = pd.read_csv("crunchbase-api/CONFIDENTIAL Cloud 100 CEO Roundtable Invites - company_search_terms.csv").Search.to_list()
companies.sort()
uuid_companies = []
counter = 0
for company in companies:
    counter += 1
    print('*'*50)
    print("{}) Searching for {}".format(counter, company.upper()))
    found = autocompletes(company, ["organizations"], limit=1)
    uuid_companies.append(found["identifier.uuid"][0])
    print('*'*50)
    print("Found {} !!!!!!!\nDESCRIPTION: {}".format(found["identifier.value"][0].upper(), found["short_description"][0]))

**************************************************
1) Searching for ACTIFIO
**************************************************
Found ACTIFIO !!!!!!!
DESCRIPTION: Actifio delivers enterprise data-as-a-service to global enterprise customers and service provider partners.
**************************************************
2) Searching for AIRTABLE
**************************************************
Found AIRTABLE !!!!!!!
DESCRIPTION: Airtable is a cloud-based software company that blends a traditional spreadsheet with a database.
**************************************************
3) Searching for AMPLITUDE
**************************************************
Found AMPLITUDE !!!!!!!
DESCRIPTION: Amplitude is a product intelligence platform that provides digital product tools that help teams run and grow their businesses.
**************************************************
4) Searching for APPDIRECT
**************************************************
Found APPDIRECT !!!!!!!
DESCRIPTION: AppDirec

**************************************************
Found COUCHBASE !!!!!!!
DESCRIPTION: Couchbase is a NoSQL cloud database for business-critical applications.
**************************************************
32) Searching for CULTURE AMP
**************************************************
Found CULTURE AMP !!!!!!!
DESCRIPTION: Culture Amp is an employee analytics platform that specializes in staff surveying and analytics.
**************************************************
33) Searching for DISCO LEGALTECH
**************************************************
Found DISCO !!!!!!!
DESCRIPTION: DISCO is a legaltech company that applies AI and cloud computing to help lawyers and legal teams improve legal outcomes.
**************************************************
34) Searching for DARKTRACE
**************************************************
Found DARKTRACE !!!!!!!
DESCRIPTION: Darktrace is the worldâ€™s leading cyber AI company and the creator of Autonomous Response technology.
************

**************************************************
Found INVISION !!!!!!!
DESCRIPTION: Invision is a consulting company that offers business support systems and management solutions.
**************************************************
63) Searching for JFROG
**************************************************
Found JFROG !!!!!!!
DESCRIPTION: JFrog provides software developers with a binary repository management solution.
**************************************************
64) Searching for KEEPTRUCKIN
**************************************************
Found KEEPTRUCKIN !!!!!!!
DESCRIPTION: KeepTruckin is a San Francisco-based startup helping trucking companies manage their fleets and have their drivers legally log their hours.
**************************************************
65) Searching for KONG
**************************************************
Found KONG !!!!!!!
DESCRIPTION: Kong is an open-source platform to manage and secure APIs and Microservices.
********************************

**************************************************
Found SEISMIC !!!!!!!
DESCRIPTION: Seismic is a sales enablement and marketing orchestration platform that improves close rates & delivers larger deals for sales teams.
**************************************************
94) Searching for SERVICETITAN
**************************************************
Found SERVICETITAN !!!!!!!
DESCRIPTION: ServiceTitan is a service management software that helps home services businesses generate more leads and close more sales.
**************************************************
95) Searching for SISENSE
**************************************************
Found SISENSE !!!!!!!
DESCRIPTION: Sisense is a business analytics software company that delivers agile analytics at scale.
**************************************************
96) Searching for SNOWFLAKE
**************************************************
Found SNOWFLAKE !!!!!!!
DESCRIPTION: Snowflake is a cloud data platform that provides a data warehous

KeyboardInterrupt: 

## Pull all current/former board affiliations for companies, using `makequery_jobs_board_of_org` helper function

In [45]:
query = makequery_jobs_board_of_org(uuid_companies) # Helper function
raw = pd.DataFrame() # Global raw variable
comp_count = url_count(query, "jobs") 
url_extraction(query, "jobs")
board_affiliations = raw[["properties.organization_identifier.value",
                    "properties.person_identifier.uuid",
                    "properties.person_identifier.value", 
                    "properties.title", 
                    "properties.job_type", 
                    "properties.is_current",
                    "properties.started_on.value",
                    "properties.ended_on.value",
                    "properties.updated_at"]].sort_values(["properties.organization_identifier.value"])
# Get UUIDs of people
uuid_board_members = list(set(board_affiliations["properties.person_identifier.uuid"].to_list()))
# Display
board_affiliations.head()

Unnamed: 0,properties.organization_identifier.value,properties.person_identifier.uuid,properties.person_identifier.value,properties.title,properties.job_type,properties.is_current,properties.started_on.value,properties.ended_on.value,properties.updated_at
330,Atlassian,6522f077-190d-e974-0743-81000498e74f,Ryan Sweeney,Director,board_member,False,2015-01-01,2017-01-01,2019-09-06T11:03:02Z
46,Atlassian,9da0aa4b-6fd1-9d12-cfb6-5073f7c86d9a,Steve Sordello,Director,board_member,True,2015-11-01,,2019-09-06T13:26:35Z
307,Atlassian,08e82d6b-51a2-e642-23bf-05e52cbd9278,Murray Demo,Director,board_member,False,2011-12-01,2015-10-01,2019-09-06T13:05:36Z
206,Atlassian,822b9c18-cb8d-e494-68f3-6cb138620080,Kirk Bowman,Director,board_member,False,2012-01-01,2016-01-01,2019-09-06T12:19:37Z
112,Atlassian,d4e2a745-53ff-36e1-9960-17270d5b7e8c,Richard Wong,Director,board_member,True,2010-07-01,,2019-09-06T13:05:36Z


## Find primary organization info, using `people_entity` helper function

In [57]:
# Test example
testerson = uuid_board_members[0]
people_entity(testerson)

({'6addd93e-f07c-1291-b244-7df3f582c4ff': 'NA'},
 {'6addd93e-f07c-1291-b244-7df3f582c4ff': 'NA'},
 {'6addd93e-f07c-1291-b244-7df3f582c4ff': 'https://www.linkedin.com/in/joshsteinvc/'})

In [60]:
# Start with empty dictionnaries
dict_title = {}
dict_org = {}
dict_linkedin = {}
no_primary_org = []

# For each API call, update dictionary if it's not empty
for i,person in enumerate(uuid_board_members):
    print(i,end =" ")
    dict_title_person, dict_org_person, dict_li_person = people_entity(person)
    if dict_title_person[person] != "NA":
        dict_title.update(dict_title_person)
    if dict_org_person[person] != "NA":
        dict_org.update(dict_org_person)
    if dict_li_person[person] != "NA":
        dict_linkedin.update(dict_li_person)
    if dict_title_person[person] == "NA" or dict_org_person[person] == "NA" or dict_li_person[person] == "NA":
        no_primary_org.append(person)

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 Usage Limited Exceeded, restart loop


TypeError: cannot unpack non-iterable NoneType object

## Repeat if stopped by Crunchbase, otherwise ignore

In [62]:
for i,person in enumerate(uuid_board_members[219:]):
    print(i,end =" ")
    dict_title_person, dict_org_person, dict_li_person = people_entity(person)
    if dict_title_person[person] != "NA":
        dict_title.update(dict_title_person)
    if dict_org_person[person] != "NA":
        dict_org.update(dict_org_person)
    if dict_li_person[person] != "NA":
        dict_linkedin.update(dict_li_person)
    if dict_title_person[person] == "NA" or dict_org_person[person] == "NA" or dict_li_person[person] == "NA":
        no_primary_org.append(person)

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 

## Add person's primary title, organization, and LinkedIn to `board_affiliations` dataframe.

In [64]:
board_affiliations['Primary_Job_Title'] = board_affiliations['properties.person_identifier.uuid'].map(dict_title)
board_affiliations['Primary_Organization'] = board_affiliations['properties.person_identifier.uuid'].map(dict_org)
board_affiliations['LinkedIn'] = board_affiliations['properties.person_identifier.uuid'].map(dict_linkedin)
# Create CSV file 
board_affiliations.to_csv("OUT_board_affiliations.csv")
# Title, Org, LinkedIn added
board_affiliations.head()

Unnamed: 0,properties.organization_identifier.value,properties.person_identifier.uuid,properties.person_identifier.value,properties.title,properties.job_type,properties.is_current,properties.started_on.value,properties.ended_on.value,properties.updated_at,Primary_Job_Title,Primary_Organization,LinkedIn
330,Atlassian,6522f077-190d-e974-0743-81000498e74f,Ryan Sweeney,Director,board_member,False,2015-01-01,2017-01-01,2019-09-06T11:03:02Z,General Partner,Accel,http://www.linkedin.com/in/ryanjsweeney
46,Atlassian,9da0aa4b-6fd1-9d12-cfb6-5073f7c86d9a,Steve Sordello,Director,board_member,True,2015-11-01,,2019-09-06T13:26:35Z,Chief Financial Officer,LinkedIn,https://www.linkedin.com/in/stevesordello
307,Atlassian,08e82d6b-51a2-e642-23bf-05e52cbd9278,Murray Demo,Director,board_member,False,2011-12-01,2015-10-01,2019-09-06T13:05:36Z,Member of the Board of Directors,Citrix Systems,http://www.linkedin.com/pub/murray-demo/17/311...
206,Atlassian,822b9c18-cb8d-e494-68f3-6cb138620080,Kirk Bowman,Director,board_member,False,2012-01-01,2016-01-01,2019-09-06T12:19:37Z,Venture Partner,Accel,http://www.linkedin.com/pub/kirk-bowman/0/2b/947
112,Atlassian,d4e2a745-53ff-36e1-9960-17270d5b7e8c,Richard Wong,Director,board_member,True,2010-07-01,,2019-09-06T13:05:36Z,General Partner,Accel,http://www.linkedin.com/in/richpwong


In [65]:
# Count of how many are missing Title, Organization, or LinkedIn
len(no_primary_org)

78

In [66]:
# For each company, pull current board members
companies = list(set(board_affiliations["properties.organization_identifier.value"].to_list()))
companies.sort()

## Current board members

In [67]:
board_members_dict = {}
for org in companies:
    board_members_df = board_affiliations[(board_affiliations["properties.organization_identifier.value"]==org) &
                                          (board_affiliations["properties.is_current"]) &
                                          (board_affiliations["properties.job_type"]=="board_member")]
    board_members_names = board_members_df["properties.person_identifier.value"].to_list()
    board_members_org = board_members_df["Primary_Organization"].to_list()
    
    # Turn into string
    board_string = ""
    if board_members_names != []:
        board_info = dict(zip(board_members_names, board_members_org))
        # Add them to string
        for name, company in sorted(board_info.items()):
            if pd.isna(company):
                board_string += name + "; "
            else:
                board_string += name + " (" +company+"); "
        # Remove trailing comma
        board_string = board_string[:-2]
    board_members_dict[org] = board_string
board_members_dict_1 = board_members_dict

## Former board members

In [68]:
board_members_dict = {}
for org in companies:
    board_members_df = board_affiliations[(board_affiliations["properties.organization_identifier.value"]==org) &
                                          (board_affiliations["properties.is_current"] == False) &
                                          (board_affiliations["properties.job_type"]=="board_member")]
    board_members_names = board_members_df["properties.person_identifier.value"].to_list()
    board_members_org = board_members_df["Primary_Organization"].to_list()
    
    board_string = ""
    if board_members_names != []:
        board_info = dict(zip(board_members_names,board_members_org))
        # Add them to string
        for name, company in sorted(board_info.items()):
            if pd.isna(company):
                board_string += name + "; "
            else:
                board_string += name + " (" +company+"); "
        # Remove trailing comma
        board_string = board_string[:-2]
    board_members_dict[org] = board_string
board_members_dict_2 = board_members_dict

## Current board advisors or board observers

In [69]:
# For each company, pull current board affiliations
board_members_dict = {}
for org in companies:
    board_members_df = board_affiliations[(board_affiliations["properties.organization_identifier.value"]==org) &
                                          (board_affiliations["properties.is_current"]) &
                                          (board_affiliations["properties.job_type"]!="board_member")]
    board_members_names = board_members_df["properties.person_identifier.value"].to_list()
    board_members_org = board_members_df["Primary_Organization"].to_list()
    
    board_string = ""
    if board_members_names != []:
        board_info = dict(zip(board_members_names,board_members_org))
        # Add them to string
        for name, company in sorted(board_info.items()):
            if pd.isna(company):
                board_string += name + "; "
            else:
                board_string += name + " (" +company+"); "
        # Remove trailing comma
        board_string = board_string[:-2]
    board_members_dict[org] = board_string
board_members_dict_3 = board_members_dict

## Former board advisors or board observers

In [70]:
# For each company, pull former board affiliations
board_members_dict = {}
for org in companies:
    board_members_df = board_affiliations[(board_affiliations["properties.organization_identifier.value"]==org) &
                                          (board_affiliations["properties.is_current"] == False) &
                                          (board_affiliations["properties.job_type"]!="board_member")]
    board_members_names = board_members_df["properties.person_identifier.value"].to_list()
    board_members_org = board_members_df["Primary_Organization"].to_list()
    
    board_string = ""
    if board_members_names != []:
        board_info = dict(zip(board_members_names,board_members_org))
        # Add them to string
        for name, company in sorted(board_info.items()):
            if pd.isna(company):
                board_string += name + "; "
            else:
                board_string += name + " (" +company+"); "
        # Remove trailing comma
        board_string = board_string[:-2]
    board_members_dict[org] = board_string
board_members_dict_4 = board_members_dict

## To CSV

In [72]:
with open('OUT_str_board_affiliations.csv', 'w') as f:
    for key in board_members_dict.keys():
        f.write("%s, %s, %s, %s, %s\n" % (key, 
                                          board_members_dict_1[key], 
                                          board_members_dict_2[key], 
                                          board_members_dict_3[key], 
                                          board_members_dict_4[key]))