### 1. Import Python Packages

In [2]:
import requests
import json
import pandas as pd
from sqlalchemy import create_engine

### 2. Retrieve Organization Data via Cruchbase API

a. Gathering data on 1000 companies located accross the globe

In [2]:
# Storing api in variable
api_url = "https://crunchbase-crunchbase-v1.p.rapidapi.com/searches/organizations"

# Setting up request body and storing in a variable
payload = {
    "field_ids": [
        "identifier",
        "location_identifiers",
        "short_description",
        "rank_org"
    ],
    "limit": 1000,
    "order": [
        {
            "field_id": "rank_org",
            "sort": "asc"
        }
    ],
    "query": [
        {
            "field_id": "location_identifiers",
            "operator_id": "includes",
            "type": "predicate",
            "values": [
                "europe",
                "north-america",
                "latin-america",
                "asia",
                "australia",
                "new-zealand"
            ]
        },
        {
            "field_id": "facet_ids",
            "operator_id": "includes",
            "type": "predicate",
            "values": [
                "company"
            ]
        }
    ]
}

# Establishing headers
headers = {
    "content-type" : "application/json",
    "x-rapidapi-key" : "API_KEY",
    "x-rapidapi-host" : "HOST"
}

In [4]:
# Posting request and verifying a 200 HTTP status code
response = requests.post(api_url, data=json.dumps(payload), headers=headers)

response

<Response [200]>

In [5]:
# Decoding API response to JSON
json.loads(response.text)

{'count': 1089038,
 'entities': [{'uuid': '1a410398-3a72-5882-99b8-6318cf594850',
   'properties': {'identifier': {'permalink': 'softbank',
     'image_id': 'liab4nlbn8ov1vzuwgqu',
     'uuid': '1a410398-3a72-5882-99b8-6318cf594850',
     'entity_def_id': 'organization',
     'value': 'SoftBank'},
    'short_description': 'SoftBank provides fixed-line, mobile telephony, internet, telecommunications, and digital television products.',
    'rank_org': 1,
    'location_identifiers': [{'permalink': 'tokyo-tokyo',
      'uuid': '127ba62d-d40a-3d12-42c0-e606f9fa9440',
      'location_type': 'city',
      'entity_def_id': 'location',
      'value': 'Tokyo'},
     {'permalink': 'tokyo-japan',
      'uuid': 'ee6ee543-afd4-5595-8c2b-ffd3ea11a243',
      'location_type': 'region',
      'entity_def_id': 'location',
      'value': 'Tokyo'},
     {'permalink': 'japan',
      'uuid': 'e2781ac6-611b-376a-fc7d-ddec670c3d94',
      'location_type': 'country',
      'entity_def_id': 'location',
      'v

In [6]:
api_response = json.loads(response.text)

In [7]:
# finding key that lists the european organizations and storing it in a variable that we can loop through
company_list = api_response["entities"]

# Checking variable type to confirm it is a list
type(company_list)

list

In [8]:
company_list

[{'uuid': '1a410398-3a72-5882-99b8-6318cf594850',
  'properties': {'identifier': {'permalink': 'softbank',
    'image_id': 'liab4nlbn8ov1vzuwgqu',
    'uuid': '1a410398-3a72-5882-99b8-6318cf594850',
    'entity_def_id': 'organization',
    'value': 'SoftBank'},
   'short_description': 'SoftBank provides fixed-line, mobile telephony, internet, telecommunications, and digital television products.',
   'rank_org': 1,
   'location_identifiers': [{'permalink': 'tokyo-tokyo',
     'uuid': '127ba62d-d40a-3d12-42c0-e606f9fa9440',
     'location_type': 'city',
     'entity_def_id': 'location',
     'value': 'Tokyo'},
    {'permalink': 'tokyo-japan',
     'uuid': 'ee6ee543-afd4-5595-8c2b-ffd3ea11a243',
     'location_type': 'region',
     'entity_def_id': 'location',
     'value': 'Tokyo'},
    {'permalink': 'japan',
     'uuid': 'e2781ac6-611b-376a-fc7d-ddec670c3d94',
     'location_type': 'country',
     'entity_def_id': 'location',
     'value': 'Japan'},
    {'permalink': 'asia',
     'uuid'

In [9]:
# Initalizing empty dictionary to append values to
company_details = {
    "company_name" : [],
    "company_country" : []
}

# Looping through JSON resopnse to extract required data
for company in company_list :
    
    company_name = company["properties"]["identifier"]["value"]
    company_details["company_name"].append(company_name)
    print("company_name:", company_name)
    
    # Since there are multiple permalink dicts in the location_identifiers array, I specified the specific
    # Array position for the permalink dict that is storing the name of the comapany's country 
    country_dict_array_position = 2
    
    company_country = company["properties"]["location_identifiers"][country_dict_array_position]["value"]
    company_details["company_country"].append(company_country)
    print("company_location:", company_country)
    
    print("-"*70)

company_name: SoftBank
company_location: Japan
----------------------------------------------------------------------
company_name: Compass
company_location: United States
----------------------------------------------------------------------
company_name: Deliveroo
company_location: United Kingdom
----------------------------------------------------------------------
company_name: Citigroup
company_location: United States
----------------------------------------------------------------------
company_name: Spotify
company_location: Sweden
----------------------------------------------------------------------
company_name: Ant Group
company_location: China
----------------------------------------------------------------------
company_name: IBM
company_location: United States
----------------------------------------------------------------------
company_name: Crowdcube
company_location: United Kingdom
----------------------------------------------------------------------
company_name: Be

company_name: Hotmart
company_location: Brazil
----------------------------------------------------------------------
company_name: Morgan Stanley
company_location: United States
----------------------------------------------------------------------
company_name: Pipefy
company_location: United States
----------------------------------------------------------------------
company_name: Infobip
company_location: United Kingdom
----------------------------------------------------------------------
company_name: Delhivery
company_location: India
----------------------------------------------------------------------
company_name: MicroStrategy
company_location: United States
----------------------------------------------------------------------
company_name: SomaLogic
company_location: United States
----------------------------------------------------------------------
company_name: Pie Insurance
company_location: United States
---------------------------------------------------------------

company_name: Baidu
company_location: China
----------------------------------------------------------------------
company_name: Wonderschool
company_location: United States
----------------------------------------------------------------------
company_name: New Relic
company_location: United States
----------------------------------------------------------------------
company_name: UserTesting
company_location: United States
----------------------------------------------------------------------
company_name: Calm
company_location: United States
----------------------------------------------------------------------
company_name: SimilarWeb
company_location: United States
----------------------------------------------------------------------
company_name: Forter
company_location: United States
----------------------------------------------------------------------
company_name: Dunzo
company_location: India
----------------------------------------------------------------------
company_na

company_name: Exscientia
company_location: United Kingdom
----------------------------------------------------------------------
company_name: Addition
company_location: United States
----------------------------------------------------------------------
company_name: Petco
company_location: United States
----------------------------------------------------------------------
company_name: Allianz
company_location: Germany
----------------------------------------------------------------------
company_name: REEF Technology
company_location: United States
----------------------------------------------------------------------
company_name: Blavity, Inc.
company_location: United States
----------------------------------------------------------------------
company_name: SmileDirectClub
company_location: United States
----------------------------------------------------------------------
company_name: Yandex
company_location: Russian Federation
------------------------------------------------

company_name: CERE Network
company_location: United States
----------------------------------------------------------------------
company_name: Cresco Labs
company_location: United States
----------------------------------------------------------------------
company_name: TAL Education Group
company_location: China
----------------------------------------------------------------------
company_name: MuleSoft
company_location: United States
----------------------------------------------------------------------
company_name: Kaiser Permanente
company_location: United States
----------------------------------------------------------------------
company_name: Pitstop
company_location: India
----------------------------------------------------------------------
company_name: Helix
company_location: United States
----------------------------------------------------------------------
company_name: Qualcomm
company_location: United States
--------------------------------------------------------

In [10]:
# Checking dictionary to make sure values were appended correctly
company_details

{'company_name': ['SoftBank',
  'Compass',
  'Deliveroo',
  'Citigroup',
  'Spotify',
  'Ant Group',
  'IBM',
  'Crowdcube',
  'BetterUp',
  'Shopify',
  'DraftKings',
  'Ripple',
  'Facebook',
  'Goldman Sachs',
  'Coursera',
  'Apple',
  'CRED',
  'Apollo',
  'Twilio',
  'Calendly',
  'Intel',
  'Nokia',
  'Square',
  'Pipe',
  'Twitch',
  'Epic Games',
  'Cityblock Health',
  'Roku',
  'ThredUp',
  'Squarespace',
  'Tencent',
  'StockX',
  'TrueLayer',
  'Twitter',
  'OurCrowd',
  'Alibaba Group',
  'Gopuff',
  'Okta',
  'Nazara Technologies',
  'Drift',
  'Lenovo',
  'NYDIG',
  'Ro',
  'Pindrop',
  'Rakuten',
  'BlocPower',
  'NVIDIA',
  'DigitalOcean',
  'SoFi',
  'Daimler',
  'OneTrust',
  'PharmEasy',
  'Bilibili',
  'Pacaso',
  'Next Insurance',
  'Newchip',
  'BlackRock',
  'Cruise',
  'maude',
  'ATAI Life Sciences',
  'OpenSea',
  'Docker',
  'NIO',
  'Stripe',
  'The Honest Company',
  'Jumio',
  'Airwallex',
  'Groww',
  'Redis Labs',
  'Hopper',
  'Ironclad',
  'Cyble',
 

In [11]:
# converting dictionary into pandas dataframe and checking the first five rows
company_df = pd.DataFrame(company_details)

company_df.head()

Unnamed: 0,company_name,company_country
0,SoftBank,Japan
1,Compass,United States
2,Deliveroo,United Kingdom
3,Citigroup,United States
4,Spotify,Sweden


In [12]:
# Saving to CSV
company_df.to_csv('company_info_pg1.csv', index=False)

b. Paginating to page 2

In [3]:
# Changing payload to pull 1000 companies globally from page 2 utilizing after_id key
payload = {
    "field_ids": [
        "identifier",
        "location_identifiers",
        "short_description",
        "rank_org"
    ],
    "limit": 1000,
    "after_id" : "7c8beaea-b3c5-14b4-5866-dc4d4c0bc7cf",
    "order": [
        {
            "field_id": "rank_org",
            "sort": "asc"
        }
    ],
    "query": [
        {
            "field_id": "location_identifiers",
            "operator_id": "includes",
            "type": "predicate",
            "values": [
                "europe",
                "north-america",
                "latin-america",
                "asia",
                "australia",
                "new-zealand"
            ]
        },
        {
            "field_id": "facet_ids",
            "operator_id": "includes",
            "type": "predicate",
            "values": [
                "company"
            ]
        }
    ]
}


# Posting request
response = requests.post(api_url, data=json.dumps(payload), headers=headers)

In [4]:
# Checking for 200 Status code
response

<Response [200]>

In [5]:
# Decoding to json and locating key that stores list of companies
api_output = json.loads(response.text)

pg2_company_list = api_output["entities"]

type(pg2_company_list)

list

In [6]:
# Confirming output
api_output

{'count': 1089957,
 'entities': [{'uuid': 'f1f88bbc-76d8-e8ee-2654-5081bdb1f74d',
   'properties': {'identifier': {'permalink': 'indie-gogo',
     'image_id': 'kapksutcdx9rbrygqta9',
     'uuid': 'f1f88bbc-76d8-e8ee-2654-5081bdb1f74d',
     'entity_def_id': 'organization',
     'value': 'Indiegogo'},
    'short_description': 'Indiegogo is a crowdfunding platform empowering people around the world to fund projects that matter to them.',
    'rank_org': 1120,
    'location_identifiers': [{'permalink': 'san-francisco-california',
      'uuid': '528f5e3c-90d1-1111-5d1c-2e4ff979d58e',
      'location_type': 'city',
      'entity_def_id': 'location',
      'value': 'San Francisco'},
     {'permalink': 'california-united-states',
      'uuid': 'eb879a83-c91a-121e-0bb8-829782dbcf04',
      'location_type': 'region',
      'entity_def_id': 'location',
      'value': 'California'},
     {'permalink': 'united-states',
      'uuid': 'f110fca2-1055-99f6-996d-011c198b3928',
      'location_type': 'c

In [24]:
# Looping through list to extract data and append to dictionary
pg2_company_details = {
    "company_name" : [],
    "company_country" : []
}

for company in pg2_company_list :
    
    company_name = company["properties"]["identifier"]["value"]
    pg2_company_details["company_name"].append(company_name)
    print("company_name:", company_name)
    
    country_value_array_location = 2
    company_country = company["properties"]["location_identifiers"][country_value_array_location]["value"]
    pg2_company_details["company_country"].append(company_country)
    print("company_country:", company_country)
    
    print('-'*70)

company_name: OZiva
company_country: India
----------------------------------------------------------------------
company_name: Vi
company_country: United States
----------------------------------------------------------------------
company_name: Cashify
company_country: India
----------------------------------------------------------------------
company_name: Ather Energy
company_country: India
----------------------------------------------------------------------
company_name: Innoviz Technologies
company_country: Israel
----------------------------------------------------------------------
company_name: Blippar
company_country: United Kingdom
----------------------------------------------------------------------
company_name: Skycatch
company_country: United States
----------------------------------------------------------------------
company_name: TransUnion
company_country: United States
----------------------------------------------------------------------
company_name: Baraja
co

----------------------------------------------------------------------
company_name: Ankr
company_country: United States
----------------------------------------------------------------------
company_name: Hudl
company_country: United States
----------------------------------------------------------------------
company_name: Second Sight
company_country: United States
----------------------------------------------------------------------
company_name: Abacum
company_country: United States
----------------------------------------------------------------------
company_name: Halodoc
company_country: Indonesia
----------------------------------------------------------------------
company_name: CarDekho
company_country: India
----------------------------------------------------------------------
company_name: Arix Bioscience
company_country: United Kingdom
----------------------------------------------------------------------
company_name: Yext
company_country: United States
---------------

company_name: Truepill
company_country: United States
----------------------------------------------------------------------
company_name: 58.com
company_country: China
----------------------------------------------------------------------
company_name: CloudBees
company_country: United States
----------------------------------------------------------------------
company_name: Clearcover
company_country: United States
----------------------------------------------------------------------
company_name: Groove
company_country: United States
----------------------------------------------------------------------
company_name: Novo Nordisk
company_country: Denmark
----------------------------------------------------------------------
company_name: LinearB
company_country: United States
----------------------------------------------------------------------
company_name: minu
company_country: Mexico
----------------------------------------------------------------------
company_name: Stacks
co

company_name: Rivigo
company_country: India
----------------------------------------------------------------------
company_name: SeatGeek
company_country: United States
----------------------------------------------------------------------
company_name: IDnow
company_country: Germany
----------------------------------------------------------------------
company_name: Canopy
company_country: United States
----------------------------------------------------------------------
company_name: Nuxeo
company_country: United States
----------------------------------------------------------------------
company_name: Hipcamp
company_country: United States
----------------------------------------------------------------------
company_name: Atari
company_country: United States
----------------------------------------------------------------------
company_name: MediBuddy
company_country: India
----------------------------------------------------------------------
company_name: Bancor
company_countr

company_country: Australia
----------------------------------------------------------------------
company_name: SeaChange International
company_country: United States
----------------------------------------------------------------------
company_name: Slync.io
company_country: United States
----------------------------------------------------------------------
company_name: Pienso
company_country: United States
----------------------------------------------------------------------
company_name: Signifyd
company_country: United States
----------------------------------------------------------------------
company_name: Virsec
company_country: United States
----------------------------------------------------------------------
company_name: FiscalNote
company_country: United States
----------------------------------------------------------------------
company_name: Canvas Medical
company_country: United States
----------------------------------------------------------------------
company_

In [26]:
# Checking appended values
pg2_company_details

{'company_name': ['OZiva',
  'Vi',
  'Cashify',
  'Ather Energy',
  'Innoviz Technologies',
  'Blippar',
  'Skycatch',
  'TransUnion',
  'Baraja',
  'Color',
  'FlixBus',
  'Windfall',
  'Tripledot Studios',
  'Covalent',
  'TravelPerk',
  'WELL Health Technologies',
  'The Zebra',
  'Indiegogo',
  'SendGrid',
  'Motorola Solutions',
  'Section4',
  'Lark',
  'Moonfare',
  'Revolut',
  'Markforged',
  'OfferUp',
  'ClassPass',
  'Freshly',
  'PureStake',
  'Landit',
  'Pyxis Oncology',
  'Oda',
  'Telegram Messenger',
  'Sarcos Robotics',
  'Alloy Therapeutics',
  'iHeartMedia',
  'IRI',
  'Avanti Financial Group',
  'Trulioo',
  'comScore',
  'Honor',
  'Greenbits',
  'Nuvolo',
  'PAR Technology',
  'AntWak',
  'ActionIQ',
  'Ecovative Design',
  'Fluid Truck',
  'OpenLattice',
  'UST',
  'Starburst Data',
  'goTenna',
  'Vacasa',
  'Good Catch',
  'Collective Health',
  'ForgeRock',
  'Velocity Global',
  'Imperva',
  'Kraken',
  'Loggi',
  'Tylko',
  'Fiverr.com',
  'Instil Bio',
  

In [27]:
# Truning into pd DF and checking first five rows

pg2_companies_df = pd.DataFrame(pg2_company_details)

pg2_companies_df.head()

Unnamed: 0,company_name,company_country
0,OZiva,India
1,Vi,United States
2,Cashify,India
3,Ather Energy,India
4,Innoviz Technologies,Israel


In [28]:
# Saving to CSV
pg2_companies_df.to_csv('company_info_pg2.csv', index=False)

c. Paginating to page 3

In [11]:
# Changing payload to pull 1000 companies from pg 3

payload = {
    "field_ids": [
        "identifier",
        "location_identifiers",
        "short_description",
        "rank_org"
    ],
    "limit": 1000,
    "after_id" : "6895b3bb-e29a-05fb-1b87-ee747eeae975",
    "order": [
        {
            "field_id": "rank_org",
            "sort": "asc"
        }
    ],
    "query": [
        {
            "field_id": "location_identifiers",
            "operator_id": "includes",
            "type": "predicate",
            "values": [
                "europe",
                "north-america",
                "latin-america",
                "asia",
                "australia",
                "new-zealand"
            ]
        },
        {
            "field_id": "facet_ids",
            "operator_id": "includes",
            "type": "predicate",
            "values": [
                "company"
            ]
        }
    ]
}

# posting request
api_response = requests.post(api_url, data=json.dumps(payload), headers=headers)

In [12]:
# Checking for 200 status code
api_response

<Response [200]>

In [13]:
# Decoding to json and locating key that stores list of apac companies

api_output = json.loads(api_response.text)

pg3_company_list = api_output["entities"]

type(pg3_company_list)

list

In [14]:
# Checking output
api_output

{'count': 1089957,
 'entities': [{'uuid': 'd9b71c86-fafa-76f7-c046-497a63c8e8ce',
   'properties': {'identifier': {'permalink': 'klook',
     'image_id': 'v1433147720/dhpc1eewtqbrxfpg6edu.png',
     'uuid': 'd9b71c86-fafa-76f7-c046-497a63c8e8ce',
     'entity_def_id': 'organization',
     'value': 'KLOOK'},
    'short_description': 'Klook is a travel and leisure booking platform designed to connect travelers with experiences and attractions.',
    'rank_org': 2200,
    'location_identifiers': [{'permalink': 'central-hong-kong-island',
      'uuid': '36e99078-75b7-9386-6f90-b8f11ee1daf4',
      'location_type': 'city',
      'entity_def_id': 'location',
      'value': 'Central'},
     {'permalink': 'hong-kong-island-hong-kong',
      'uuid': '469929a5-67e7-69c1-b274-088b78de23a2',
      'location_type': 'region',
      'entity_def_id': 'location',
      'value': 'Hong Kong Island'},
     {'permalink': 'hong-kong',
      'uuid': 'ed443fa6-a2c6-9f9c-281a-9172fc2f0c71',
      'location_typ

In [15]:
# Looping through pg3 to extract data and store in dictionary
pg3_company_details = {
    "company_name" : [],
    "company_country" : []
}

for company in pg3_company_list :
    company_name = company["properties"]["identifier"]["value"]
    pg3_company_details['company_name'].append(company_name)
    print(company_name)
    
    country_value_array_location = 2
    company_country = company["properties"]["location_identifiers"][country_value_array_location]["value"]
    pg3_company_details['company_country'].append(company_country)
    print(company_country)
    
    print('-'*70)

KLOOK
Hong Kong
----------------------------------------------------------------------
Ehang
China
----------------------------------------------------------------------
Fetch
United Kingdom
----------------------------------------------------------------------
Harborside
United States
----------------------------------------------------------------------
Flink Food
Germany
----------------------------------------------------------------------
Fabric
Israel
----------------------------------------------------------------------
ZOLAR
Germany
----------------------------------------------------------------------
Penn National Gaming
United States
----------------------------------------------------------------------
Wakefit
India
----------------------------------------------------------------------
VIPKID
China
----------------------------------------------------------------------
Rigetti Computing
United States
----------------------------------------------------------------------
Meda

PingCAP
United States
----------------------------------------------------------------------
Formlabs
United States
----------------------------------------------------------------------
Waze
United States
----------------------------------------------------------------------
BarkBox
United States
----------------------------------------------------------------------
MAPS.ME
Switzerland
----------------------------------------------------------------------
Kyriba
United States
----------------------------------------------------------------------
Anyscale
United States
----------------------------------------------------------------------
Listo
United States
----------------------------------------------------------------------
commercetools
Germany
----------------------------------------------------------------------
Corporate Finance Institute
Canada
----------------------------------------------------------------------
Kite Pharma
United States
-------------------------------------

iFit - fitness technology
United States
----------------------------------------------------------------------
WaveSense
United States
----------------------------------------------------------------------
Manipal Hospitals
India
----------------------------------------------------------------------
Meero
France
----------------------------------------------------------------------
FreeCharge
India
----------------------------------------------------------------------
Glassbox
United Kingdom
----------------------------------------------------------------------
ACADIA Pharmaceuticals
United States
----------------------------------------------------------------------
REX - Real Estate Exchange
United States
----------------------------------------------------------------------
Paytm Payments Bank
India
----------------------------------------------------------------------
mmhmm
United States
----------------------------------------------------------------------
Sema4
United States
----

In [16]:
# Checking dictionary
pg3_company_details

{'company_name': ['KLOOK',
  'Ehang',
  'Fetch',
  'Harborside',
  'Flink Food',
  'Fabric',
  'ZOLAR',
  'Penn National Gaming',
  'Wakefit',
  'VIPKID',
  'Rigetti Computing',
  'Medable',
  'Malomo',
  'Augmenta',
  'Fireflies.ai',
  'Descript',
  'Oishii Farm',
  'Rodeo Therapeutics',
  'Blackbuck',
  'Aera Technology',
  'Birdeye',
  'Soldo',
  'Upgrade',
  'Refraction AI',
  'Mamaearth',
  'Shiprocket',
  'Sendinblue',
  'Omaze',
  'Oscar Health',
  'Ribbon Home',
  'Gousto',
  'Achilles Therapeutics',
  'Bringg',
  'The/Studio',
  'PeerStreet',
  'Vercel',
  'Financeit',
  'GumGum',
  'loanDepot',
  'AxleHire',
  'Uplift',
  'Atomwise',
  'Nylas',
  'Snowflake',
  'Cohere Health',
  'Universal Music Group',
  'VSPN',
  'Olist',
  'Valence Community',
  'Tribune Publishing',
  'Gravie',
  'Grover',
  'StreamElements',
  'Deep Instinct',
  'Creditas',
  'The Wild',
  'Emburse',
  'Zola',
  'Fenergo',
  'Pythian',
  'Voyager Space Holdings',
  'DISCO',
  'DICE',
  'Industrious',
  

In [17]:
# Converting dictionary into Dataframe and checking first 5 rows
pg3_company_df = pd.DataFrame(pg3_company_details)

pg3_company_df.head()

Unnamed: 0,company_name,company_country
0,KLOOK,Hong Kong
1,Ehang,China
2,Fetch,United Kingdom
3,Harborside,United States
4,Flink Food,Germany


In [18]:
# Saving as CSV
pg3_company_df.to_csv("company_info_pg3.csv", index=False)

d. Paginating to page 4

In [19]:
# Changing payload to pull 1000 companies from pg 4

payload = {
    "field_ids": [
        "identifier",
        "location_identifiers",
        "short_description",
        "rank_org"
    ],
    "limit": 1000,
    "after_id" : "49f195d7-c566-4d5d-8d6b-3ac849204bd9",
    "order": [
        {
            "field_id": "rank_org",
            "sort": "asc"
        }
    ],
    "query": [
        {
            "field_id": "location_identifiers",
            "operator_id": "includes",
            "type": "predicate",
            "values": [
                "europe",
                "north-america",
                "latin-america",
                "asia",
                "australia",
                "new-zealand"
            ]
        },
        {
            "field_id": "facet_ids",
            "operator_id": "includes",
            "type": "predicate",
            "values": [
                "company"
            ]
        }
    ]
}

# posting request
api_response = requests.post(api_url, data=json.dumps(payload), headers=headers)

In [20]:
# Checking response
api_response

<Response [200]>

In [22]:
# Decoding to JSON, locating key that stores list of companies, and checking the type to confirm it is a list
api_output = json.loads(api_response.text)

pg4_company_list = api_output['entities']

type(pg4_company_list)

list

In [23]:
# Checking output
api_output

{'count': 1089957,
 'entities': [{'uuid': 'bd1ad07d-51c6-79bf-3395-b112e7904d26',
   'properties': {'identifier': {'permalink': 'fibrogen',
     'image_id': 'hm0jshjbuu0t14gtibqa',
     'uuid': 'bd1ad07d-51c6-79bf-3395-b112e7904d26',
     'entity_def_id': 'organization',
     'value': 'FibroGen'},
    'short_description': 'FibroGen, a research-based biotech\xa0firm, uses its expertise CTGF and HIF biology to discover, develop, and commercialize novel therapeutics.',
    'rank_org': 3274,
    'location_identifiers': [{'permalink': 'san-francisco-california',
      'uuid': '528f5e3c-90d1-1111-5d1c-2e4ff979d58e',
      'location_type': 'city',
      'entity_def_id': 'location',
      'value': 'San Francisco'},
     {'permalink': 'california-united-states',
      'uuid': 'eb879a83-c91a-121e-0bb8-829782dbcf04',
      'location_type': 'region',
      'entity_def_id': 'location',
      'value': 'California'},
     {'permalink': 'united-states',
      'uuid': 'f110fca2-1055-99f6-996d-011c198b3

In [24]:
# Looping through pg4 to extract data and store in dictionary
pg4_company_details = {
    "company_name" : [],
    "company_country" : []
}

for company in pg4_company_list :
    company_name = company["properties"]["identifier"]["value"]
    pg4_company_details['company_name'].append(company_name)
    print(company_name)
    
    country_value_array_location = 2
    company_country = company["properties"]["location_identifiers"][country_value_array_location]["value"]
    pg4_company_details['company_country'].append(company_country)
    print(company_country)
    
    print('-'*70)

FibroGen
United States
----------------------------------------------------------------------
Pymetrics
United States
----------------------------------------------------------------------
Ramper
Brazil
----------------------------------------------------------------------
Circus Social
Singapore
----------------------------------------------------------------------
JSW Steel
India
----------------------------------------------------------------------
Jackpocket
United States
----------------------------------------------------------------------
Aetion
United States
----------------------------------------------------------------------
Mantra Health
United States
----------------------------------------------------------------------
EarnUp
United States
----------------------------------------------------------------------
Savari
United States
----------------------------------------------------------------------
Corsair
United States
---------------------------------------------------

Damon Motors
Canada
----------------------------------------------------------------------
Baubap
Mexico
----------------------------------------------------------------------
Jeff
Spain
----------------------------------------------------------------------
Kitchen United
United States
----------------------------------------------------------------------
Merit
United States
----------------------------------------------------------------------
KAYAK
United States
----------------------------------------------------------------------
Variantyx
United States
----------------------------------------------------------------------
Axon
United States
----------------------------------------------------------------------
Magicpin
India
----------------------------------------------------------------------
Deskera
Singapore
----------------------------------------------------------------------
SOPHiA GENETICS
United States
----------------------------------------------------------------------

SEBA Bank
Switzerland
----------------------------------------------------------------------
Stockly
France
----------------------------------------------------------------------
Syapse
United States
----------------------------------------------------------------------
Vapor IO
United States
----------------------------------------------------------------------
Frost Giant Studios
United States
----------------------------------------------------------------------
Essity
Sweden
----------------------------------------------------------------------
Riversand
United States
----------------------------------------------------------------------
Human API
United States
----------------------------------------------------------------------
TrustToken
United States
----------------------------------------------------------------------
Fixico
The Netherlands
----------------------------------------------------------------------
Rapid Micro Biosystems
United States
----------------------------

In [25]:
# Checking appended values
pg4_company_details

{'company_name': ['FibroGen',
  'Pymetrics',
  'Ramper',
  'Circus Social',
  'JSW Steel',
  'Jackpocket',
  'Aetion',
  'Mantra Health',
  'EarnUp',
  'Savari',
  'Corsair',
  'RealWear',
  'OKCoin',
  'Lendio',
  'Pencil',
  'Empower',
  'Elevate K-12',
  'Optoro',
  'Tigera',
  'Token Transit',
  'Sage Intacct',
  'One Concern',
  'Appcues',
  'Mode Analytics',
  'Monese',
  'idealista.com',
  'Vault',
  'SPHERE Technology Solutions',
  'SAGE Therapeutics',
  'Arcus Biosciences',
  'Formation',
  'Mattermost',
  'Apnimed',
  'Eternal',
  'PHC Holdings',
  'Health Catalyst',
  'Grocemania',
  'Amyris',
  'OneDrop',
  'Archer',
  'Tarana Wireless',
  'User Interviews',
  'Radius Payment Solutions',
  'Reonomy',
  'Fieldwire',
  'DeepMap',
  'Asher Bio',
  'Jobber',
  'Deepgram',
  'Shape Security',
  'Katana Graph',
  'Abound',
  'Flo Health',
  'Sovos',
  'Afresh Technologies',
  'Verana Health',
  'Athena',
  'Mem Labs',
  'Benevity',
  'Conviva',
  'PillPack',
  'Centessa Pharmaceu

In [26]:
# Converting dictionary into Dataframe and checking first 5 rows
pg4_company_df = pd.DataFrame(pg4_company_details)

pg4_company_df.head()

Unnamed: 0,company_name,company_country
0,FibroGen,United States
1,Pymetrics,United States
2,Ramper,Brazil
3,Circus Social,Singapore
4,JSW Steel,India


In [27]:
# Saving as CSV
pg4_company_df.to_csv("company_info_pg4.csv", index=False)

### 3. Upload Cleaned Data Into MySQL Database

a.  Upload the CountryId and RegionId fields into CountryRegions Table

In [3]:
# Uploading CSV file containing the Country and Region Id and converting it into a pandas dataframe
country_region_df = pd.DataFrame(pd.read_csv('country_region.csv'))

country_region_df.head()

Unnamed: 0,CountryID,RegionID
0,1,6
1,2,7
2,3,8
3,4,7
4,5,6


In [4]:
# Establishing connection to MySQL Database
engine = create_engine('mysql+mysqldb://USERNAME:PASSWORD@HOST/DATABASE?charset=utf8')

In [20]:
# loading data to CountryRegions table
country_region_df.to_sql('CountryRegions', engine, if_exists='append', index=False)

b. Upload Account data into Accounts Table

In [16]:
# Uploading CSV file containing account data and converting it into a pandas dataframe
# Companies under the AccountName column consist of companies pulled from the Crunchbase Search API above
account_data_df = pd.DataFrame(pd.read_csv('account_data.csv'))

account_data_df.head()

Unnamed: 0,CountryID,AccountName,AccountAgeDays,SalesStage,ForecastCategory,ContractPrice,ContractLengthMonths,ContractStartDate,PartnerId,ContractType,CloseDate,EmployeeID
0,1,Ascend Learning,5,Closed Won,Closed,10625.0,16,2019-09-20,8.0,Existing Customer - Upsell,2019-09-17,1
1,1,NetPower,98,(1) Gain Access,Pipeline,83250.0,12,2019-09-13,25.0,New Customer,2019-10-31,2
2,1,Blue Origin,134,(4) Propose Solution,Best Case,153136.4,12,2019-07-31,,New Customer,2019-11-22,3
3,1,"Agilent Technologies, Inc",119,(3) Develop Solution,Pipeline,30000.0,12,2019-11-15,,Existing Customer - New Project,2019-11-15,4
4,2,AyP Mexico,19,(1) Gain Access,Pipeline,15300.0,12,2019-11-30,14.0,New Customer,2019-11-30,5


In [28]:
# Loading data into Accounts table
account_data_df.to_sql('Accounts', engine, if_exists='append', index=False)