In [9]:
import pandas as pd
import numpy as np
import shapely
import folium
from folium.plugins import HeatMap
from collections import defaultdict

In [2]:
country_data = pd.read_csv('./country_area/API_AG.LND.TOTL.K2_DS2_en_csv_v2_1036.csv')
city_data = pd.read_csv('./cities/worldcities.csv')
continent_data = pd.read_csv('./Countries-by-continents.csv')

In [3]:
# For each country, pick the latest value of the area
country_data = country_data.dropna(subset=['2022'])
country_data = country_data[['Country Name', '2022']]

# Drop countries that don't appear in the city data
country_data = country_data[country_data['Country Name'].isin(city_data['country'])]
country_data['Continent'] = country_data['Country Name'].map(continent_data.set_index('Country')['Continent'])
country_data['Continent'].dropna(inplace=True)
country_data.rename(columns={'Country Name': 'country', '2022': 'area'}, inplace=True)

city_data.dropna(subset=['population'], inplace=True)

In [4]:
def polygon_random_points (poly, num_points):
    min_x, min_y, max_x, max_y = poly.bounds
    points = []
    while len(points) < num_points:
        random_point = shapely.Point([np.random.uniform(min_x, max_x), np.random.uniform(min_y, max_y)])
        if (random_point.within(poly)):
            points.append(random_point)
    return points

In [11]:
def generate_country_data():
    # We first sample a continent with equal probability
    continent = np.random.choice(country_data['Continent'].unique())
    continent_countries = country_data[country_data['Continent'] == continent]
    if continent_countries.empty:
        return generate_country_data()
    # We first sample a country with a probability proportional to its surface area compared to the Earth’s total surface area.
    country = np.random.choice(continent_countries['country'], p=continent_countries['area']/continent_countries['area'].sum())
    # Randomly sample a city from the selected country
    picked_city_data = city_data[city_data['country'] == country]
    if picked_city_data.empty:
        return generate_country_data()
    # Randomly sample a city based on the population
    city_name = np.random.choice(picked_city_data['city'], p=picked_city_data['population']/picked_city_data['population'].sum())
    city = city_data[city_data['city'] == city_name].iloc[0]

    city_point = shapely.geometry.Point(city['lng'], city['lat'])
    city_buffer = city_point.buffer(0.25)

    return (continent, country, city, polygon_random_points(city_buffer, 50))

In [18]:
m = folium.Map(zoom_start=2.2)

continents = defaultdict(int)
countries = defaultdict(int)
cities = defaultdict(int)

for i in range(1000):
    continent, country, city, points = generate_country_data()
    # folium.Marker([city['lat'], city['lng']], popup=city['city']).add_to(m)
    # for p in points:
    #     folium.Marker([p.y, p.x], icon=folium.Icon(color='green')).add_to(m)
    continents[continent] += 1
    countries[country] += 1

print(sorted(continents.items(), key=lambda x: x[1], reverse=True))
print(sorted(countries.items(),  key=lambda x: x[1], reverse=True))

[('Africa', 194), ('Europe', 166), ('Oceania', 165), ('Asia', 161), ('North America', 159), ('South America', 155)]
[('Australia', 153), ('United States', 78), ('Brazil', 77), ('Canada', 62), ('China', 56), ('Peru', 21), ('Argentina', 20), ('Ukraine', 19), ('Algeria', 19), ('India', 18), ('Libya', 18), ('France', 18), ('Kazakhstan', 16), ('Indonesia', 14), ('Sudan', 14), ('Mexico', 14), ('Germany', 13), ('Sweden', 12), ('South Africa', 12), ('Angola', 11), ('Spain', 11), ('Colombia', 10), ('Ethiopia', 9), ('Niger', 9), ('Bolivia', 8), ('Chad', 8), ('Mauritania', 8), ('Mali', 7), ('Chile', 7), ('Somalia', 7), ('Romania', 7), ('Afghanistan', 7), ('Norway', 7), ('Finland', 7), ('Thailand', 6), ('United Kingdom', 6), ('Papua New Guinea', 6), ('Belarus', 6), ('Tanzania', 6), ('Ireland', 6), ('Namibia', 5), ('Bulgaria', 5), ('Paraguay', 5), ('Uzbekistan', 5), ('Mongolia', 5), ('Mozambique', 5), ('Italy', 5), ('Poland', 5), ('Morocco', 5), ('Nigeria', 5), ('Central African Republic', 4), ('Bo

In [7]:
# from GoogleDataDownloader import StreetViewer

# sv = StreetViewer(verbose=False)
# for p in points:
#     sv.get_meta(f"{p.y},{p.x}")
#     sv.get_pic(f"{p.y},{p.x}")