In [31]:
# Setup

import pandas as pd
from pathlib import Path
import hvplot.pandas
import requests
import json
from pprint import pprint
from config import geoapify_key

# Import and read files

file = Path("data/geoapify_df.csv")
df = pd.read_csv(file, encoding="utf-8")

In [28]:
# Create Country DF to show Number and Percent of Growing, Declining, Stagnant, and Total Startups for each Country

# Calculate number and percentages of startups

number_growing = []
number_declining = []
number_stagnant = []
number_total = []

unique_countries = df["Country"].unique()
countries_list = unique_countries.tolist()

for country in countries_list:
    growing = 0
    declining = 0
    stagnant = 0
    total = 0
    for index, row in df.iterrows():
        if row["Country"] == country:
            total = total + 1
            if row["CAGR (%)"] > 0:
                growing = growing + 1
            elif row["CAGR (%)"] < 0:
                declining = declining + 1
            else:
                stagnant = stagnant + 1
    number_growing.append(growing)
    number_declining.append(declining)
    number_stagnant.append(stagnant)
    number_total.append(total)

country_df = pd.DataFrame({
    "Country": countries_list,
    "Number of Growing Startups": number_growing,
    "Number of Declining Startups": number_declining,
    "Number of Stagnant Startups": number_stagnant,
    "Total Number of Startups": number_total
})

country_df["Percent of Growing Startups"] = (country_df["Number of Growing Startups"]/country_df["Total Number of Startups"]) * 100
country_df["Percent of Declining Startups"] = (country_df["Number of Declining Startups"]/country_df["Total Number of Startups"]) * 100
country_df["Percent of Stagnant Startups"] = (country_df["Number of Stagnant Startups"]/country_df["Total Number of Startups"]) * 100
country_df["Percent of World's Startups"] = (country_df["Total Number of Startups"]/1207) * 100

# Get Lat and Lng of each Country using Geoapify API

latitude = []
longitude = []

for index, row in country_df.iterrows():

    # Set search parameters

    target_country = f"{row['Country']}"

    # Base url

    base_url = f"https://api.geoapify.com/v1/geocode/search?country={target_country}&format=json&apiKey={geoapify_key}"

    # Run request and get longtitude and latitude
    
    response = requests.get(base_url).json()
    try:
        lat = response["results"][0]["lat"]
        latitude.append(lat)
    except:
        latitude.append(None)
        
    try:
        lng = response["results"][0]["lon"]
        longitude.append(lng)
    except:
        longitude.append(None)

country_df["Lat"] = latitude
country_df["Lng"] = longitude

country_df

Unnamed: 0,Country,Number of Growing Startups,Number of Declining Startups,Number of Stagnant Startups,Total Number of Startups,Percent of Growing Startups,Percent of Declining Startups,Percent of Stagnant Startups,Percent of World's Startups,Lat,Lng
0,China,4,1,144,149,2.684564,0.671141,96.644295,12.344656,35.000074,104.999927
1,United States,48,15,518,581,8.261618,2.581756,89.156627,48.135874,39.78373,-100.445882
2,Australia,2,1,5,8,25.0,12.5,62.5,0.6628,-24.776109,134.755
3,United Kingdom,3,3,37,43,6.976744,6.976744,86.046512,3.562552,54.702354,-3.276575
4,India,7,5,47,59,11.864407,8.474576,79.661017,4.888152,22.351115,78.667743
5,Germany,2,1,23,26,7.692308,3.846154,88.461538,2.154101,51.163818,10.447831
6,Turkey,1,1,1,3,33.333333,33.333333,33.333333,0.24855,38.959759,34.924965
7,Seychelles,0,0,1,1,0.0,0.0,100.0,0.08285,-4.657498,55.454015
8,Sweden,0,0,5,5,0.0,0.0,100.0,0.41425,59.674971,14.520858
9,Mexico,0,0,8,8,0.0,0.0,100.0,0.6628,23.658512,-102.00771


In [40]:
# Map visualization of Top 10 Countries with the greatest % of startups in the world

greatest_percent_startups = country_df.nlargest(10, "Percent of World's Startups")

# Plot Map

import holoviews as hv

map_greatest_percent_startups = greatest_percent_startups.hvplot.points("Lng", "Lat", geo=True, tiles="OSM", 
                                                                         size="Percent of World's Startups", color="Country", 
                                                                         width=1000, height=500, scale=10, alpha=0.8,
                                                                         hover_cols=["Percent of World's Startups"]).opts(
    title="Top 10 Countries with the Greatest Percent of All Unicorn Startups in the World")

# Save map

hv.save(map_greatest_percent_startups, "output_data/map_greatest_percent_startups.html")

map_greatest_percent_startups

In [41]:
# Map visualization of Top 10 Countries with the greatest # of startups in the world

greatest_number_startups = country_df.nlargest(10, "Total Number of Startups")

# Plot Map

map_greatest_number_startups = greatest_number_startups.hvplot.points("Lng", "Lat", geo=True, tiles="OSM", 
                                                                         size="Total Number of Startups", color="Country", 
                                                                         width=1000, height=500, scale=3, alpha=0.8,
                                                                         hover_cols=["Total Number of Startups"]).opts(
    title="Top 10 Countries with the Greatest Number of Unicorn Startups in the World")

# Save map

hv.save(map_greatest_percent_startups, "output_data/map_greatest_number_startups.html")

map_greatest_number_startups

In [47]:
# Map visualization of Top 10 Countries with the greatest # of growing startups and greatest # of declining startups

greatest_number_growing = country_df.nlargest(10, "Number of Growing Startups")
greatest_number_declining = country_df.nlargest(5, "Number of Declining Startups")

# Plot Map of Top 10 Countries with the greatest # of growing startups

map_greatest_number_growing = greatest_number_growing.hvplot.points("Lng", "Lat", geo=True, tiles="OSM", 
                                                                         size="Number of Growing Startups", color="green", 
                                                                         width=1000, height=500, scale=8, alpha=0.8,
                                                                         hover_cols=["Country", "Number of Growing Startups"])

# Plot Map of Top 5 Countries with the greatest # of declining startups

map_greatest_number_declining = greatest_number_declining.hvplot.points("Lng", "Lat", geo=True, tiles="OSM", 
                                                                         size="Number of Declining Startups", color="red", 
                                                                         width=1000, height=500, scale=8, alpha=0.8,
                                                                         hover_cols=["Country", "Number of Declining Startups"])

# Combine maps

map_greatest_number_growing_declining = map_greatest_number_growing * map_greatest_number_declining
map_greatest_number_growing_declining = map_greatest_number_growing_declining.opts(
    title="Top Countries with the Greatest Number of Growing or Declining Unicorn Startups")

# Save map

hv.save(map_greatest_number_growing_declining, "output_data/map_greatest_number_growing_declining.html")

map_greatest_number_growing_declining

In [51]:
# Map visualization of Top 10 Countries with the greatest % of growing startups and greatest % of declining startups

greatest_percent_growing = country_df.nlargest(10, "Percent of Growing Startups")
greatest_percent_declining = country_df.nlargest(10, "Percent of Declining Startups")

# Plot Map of Top 10 Countries with the greatest % of growing startups

map_greatest_percent_growing = greatest_percent_growing.hvplot.points("Lng", "Lat", geo=True, tiles="OSM", 
                                                                         size="Percent of Growing Startups", color="green", 
                                                                         width=1000, height=500, scale=5, alpha=0.8,
                                                                         hover_cols=["Country", "Percent of Growing Startups"])

# Plot Map of Top 10 Countries with the greatest % of declining startups

map_greatest_percent_declining = greatest_percent_declining.hvplot.points("Lng", "Lat", geo=True, tiles="OSM", 
                                                                         size="Percent of Declining Startups", color="red", 
                                                                         width=1000, height=500, scale=5, alpha=0.8,
                                                                         hover_cols=["Country", "Percent of Declining Startups"])

# Combine maps

map_greatest_percent_growing_declining = map_greatest_percent_growing * map_greatest_percent_declining
map_greatest_percent_growing_declining = map_greatest_percent_growing_declining.opts(
    title="Top Countries with Greatest Percent of Growing or Declining Unicorn Startups\n(Percentages are calculated out of the total number of unicorn startups per country, not globally.)")

# Save map

hv.save(map_greatest_percent_growing_declining, "output_data/map_greatest_percent_growing_declining.html")

map_greatest_percent_growing_declining

In [None]:
# Map visualization of Top 10 Startups with the greatest CAGR and lowest CAGR (by city)
