In [1]:
# Setup

import pandas as pd
from pathlib import Path

In [2]:
# Import and read files

file22 = Path("data/unicorn_startups_sept_2022.csv")
df_2022 = pd.read_csv(file22, encoding="utf-8")

file23 = Path("data/unicorn_startups_july_2023.csv")
df_2023 = pd.read_csv(file23, encoding="utf-8")

file24 = Path("data/unicorn_startups_may_2024.xlsx")
df_2024 = pd.read_excel(file24)

In [3]:
# Make merged DF of for three years

df_2022.rename(columns={"Valuation ($B)":"2022_Valuation"}, inplace=True)
df_2022.rename(columns={"Investors":"Select Investors"}, inplace=True)
df_2023.rename(columns={"Valuation ($B)":"2023_Valuation"}, inplace=True)
df_2024.rename(columns={"Valuation ($B)": "2024_Valuation"}, inplace=True)

first_merge = pd.merge(df_2022, df_2023[["Company", "2023_Valuation"]], on="Company", how="left")
merged_df = pd.merge(first_merge, df_2024[["Company", "2024_Valuation"]], on="Company", how="left")
merged_df = merged_df[["Company", "2022_Valuation", "2023_Valuation", "2024_Valuation", "Date Joined", "Country", 
                       "City", "Industry", "Select Investors"]]
merged_df.to_csv("data/merged_df.csv", header=True)
merged_df.head()

Unnamed: 0,Company,2022_Valuation,2023_Valuation,2024_Valuation,Date Joined,Country,City,Industry,Select Investors
0,ByteDance,$140,$225,225.0,4/7/2017,China,Beijing,Artificial intelligence,"Sequoia Capital China, SIG Asia Investments, S..."
1,SpaceX,$127,$137,150.0,12/1/2012,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen..."
2,SHEIN,$100,$66,66.0,7/3/2018,China,Shenzhen,E-commerce & direct-to-consumer,"Tiger Global Management, Sequoia Capital China..."
3,Stripe,$95,$50,65.0,1/23/2014,United States,San Francisco,Fintech,"Khosla Ventures, LowercaseCapital, capitalG"
4,Canva,$40,$40,25.4,1/8/2018,Australia,Surry Hills,Internet software & services,"Sequoia Capital China, Blackbird Ventures, Mat..."


In [4]:
# Convert valuation columns to float

merged_df["2022_Valuation"] = merged_df["2022_Valuation"].str.replace("$", "").astype(float)
merged_df["2023_Valuation"] = merged_df["2023_Valuation"].str.replace("$", "").astype(float)

merged_df["Overall_Growth"] = merged_df["2024_Valuation"] - merged_df["2022_Valuation"]
merged_df.head()

Unnamed: 0,Company,2022_Valuation,2023_Valuation,2024_Valuation,Date Joined,Country,City,Industry,Select Investors,Overall_Growth
0,ByteDance,140.0,225.0,225.0,4/7/2017,China,Beijing,Artificial intelligence,"Sequoia Capital China, SIG Asia Investments, S...",85.0
1,SpaceX,127.0,137.0,150.0,12/1/2012,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen...",23.0
2,SHEIN,100.0,66.0,66.0,7/3/2018,China,Shenzhen,E-commerce & direct-to-consumer,"Tiger Global Management, Sequoia Capital China...",-34.0
3,Stripe,95.0,50.0,65.0,1/23/2014,United States,San Francisco,Fintech,"Khosla Ventures, LowercaseCapital, capitalG",-30.0
4,Canva,40.0,40.0,25.4,1/8/2018,Australia,Surry Hills,Internet software & services,"Sequoia Capital China, Blackbird Ventures, Mat...",-14.6


In [10]:
target_city = "Beijing, China"
geoapify_key = "9323b43c907f4c308ee0be0d40423439"

# Base url

base_url = f"https://api.geoapify.com/v1/geocode/search?text={target_city}&format=json&apiKey={geoapify_key}"

# Run request and get longtitude and latitude
    
response = requests.get(base_url).json()
lat = response["results"][0]["bbox"]["lat1"]
print(lat)

39.7457136


In [12]:
# Geoapify API setup

import hvplot.pandas
import requests
import json
from pprint import pprint

geoapify_key = "9323b43c907f4c308ee0be0d40423439"

latitude = []
longitude = []

for index, row in merged_df.iterrows():

    # Set search parameters

    target_city = f"{row['City']}, {row['Country']}"

    # Base url

    base_url = f"https://api.geoapify.com/v1/geocode/search?text={target_city}&format=json&apiKey={geoapify_key}"

    # Run request and get longtitude and latitude
    
    response = requests.get(base_url).json()
    try:
        lat = response["results"][0]["bbox"]["lat1"]
        latitude.append(lat)
    except:
        latitude.append(None)
        
    try:
        lng = response["results"][0]["bbox"]["lon1"]
        longitude.append(lng)
    except:
        longitude.append(None)

merged_df["Lat"] = latitude
merged_df["Lng"] = longitude

merged_df.head()


Unnamed: 0,Company,2022_Valuation,2023_Valuation,2024_Valuation,Date Joined,Country,City,Industry,Select Investors,Overall_Growth,Lat,Lng
0,ByteDance,140.0,225.0,225.0,4/7/2017,China,Beijing,Artificial intelligence,"Sequoia Capital China, SIG Asia Investments, S...",85.0,39.745714,116.231297
1,SpaceX,127.0,137.0,150.0,12/1/2012,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen...",23.0,29.553045,-82.109512
2,SHEIN,100.0,66.0,66.0,7/3/2018,China,Shenzhen,E-commerce & direct-to-consumer,"Tiger Global Management, Sequoia Capital China...",-34.0,22.2876,113.679356
3,Stripe,95.0,50.0,65.0,1/23/2014,United States,San Francisco,Fintech,"Khosla Ventures, LowercaseCapital, capitalG",-30.0,37.640314,-123.173825
4,Canva,40.0,40.0,25.4,1/8/2018,Australia,Surry Hills,Internet software & services,"Sequoia Capital China, Blackbird Ventures, Mat...",-14.6,-33.892106,151.201846


In [28]:
# Plot Map

import holoviews as hv

merged_df = merged_df.dropna(subset=["Company", "Lat", "Lng", "2024_Valuation"])
merged_df["2024_Valuation"] = merged_df["2024_Valuation"].abs()

map_plot = merged_df.hvplot.points("Lng", "Lat", geo=True, tiles="OSM", size="2024_Valuation", color="Company", 
                                   width=1000, height=500, scale=3, alpha=0.5,
                                  hover_cols=["Company", "2024_Valuation", "City", "Country"])

# Save map

hv.save(map_plot, "output_data/startups_by_city.html")

map_plot