<a href="https://colab.research.google.com/github/kavyajeetbora/foursquare_ai/blob/master/notebooks/01_Foursquare_POI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Exploring Foursquare POI Data


1. [Foursquare's 104M Points of Interest](https://tech.marksblogg.com/foursquare-open-global-poi-dataset.html)

In [None]:
!pip install --quiet duckdb
!pip install --quiet jupysql
!pip install --quiet duckdb-engine

In [None]:
import duckdb
import os
import pandas as pd

Foursquare POI count in India
1. Feb 2025 Release = `12.87` Lakhs
2. Aug 2025 Relase = `13.67` Lakhs

In [None]:
# Initialize DuckDB connection
con = duckdb.connect()

# Load the httpfs extension to enable S3 access
con.execute("INSTALL httpfs;")
con.execute("LOAD httpfs;")

s3_path = 's3://fsq-os-places-us-east-1/release/dt=2025-07-08/places/parquet/places-*.zstd.parquet'
df = duckdb.sql(
    f"""
        SELECT COUNT(*) as count FROM read_parquet('{s3_path}') WHERE LOWER(name) LIKE '%starbucks%' AND country = 'IN';
""")

print(df)

# Close the DuckDB connection
con.close()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

┌───────┐
│ count │
│ int64 │
├───────┤
│   680 │
└───────┘



In [None]:
# Initialize DuckDB connection
con = duckdb.connect()

# Load the httpfs extension to enable S3 access
con.execute("INSTALL httpfs;")
con.execute("LOAD httpfs;")

s3_path = f's3://fsq-os-places-us-east-1/release/dt=2025-07-08/categories/parquet/categories.zstd.parquet'
# Execute the COPY command to read from S3 and write to a Parquet file
query = f"COPY (SELECT * FROM read_parquet('{s3_path}')) TO 'categories.parquet' (FORMAT PARQUET);"
con.execute(query)

# Close the DuckDB connection
con.close()

In [None]:
df = pd.read_parquet('categories.parquet')
df.shape

(1245, 17)

In [None]:
print(df['category_label'].iloc[10])

Health and Medicine > Physician > Geriatric Doctor


In [None]:
df[df['category_level']==3]

Unnamed: 0,category_id,category_level,category_name,category_label,level1_category_id,level1_category_name,level2_category_id,level2_category_name,level3_category_id,level3_category_name,level4_category_id,level4_category_name,level5_category_id,level5_category_name,level6_category_id,level6_category_name,dt
0,63be6904847c3692a84b9c0c,3,Hockey Club,Sports and Recreation > Hockey > Hockey Club,4f4528bc4b90abdf24c9de85,Sports and Recreation,63be6904847c3692a84b9c0b,Hockey,63be6904847c3692a84b9c0c,Hockey Club,,,,,,,2025-07-08
2,4bf58dd8d48988d117951735,3,Candy Store,Retail > Food and Beverage Retail > Candy Store,4d4b7105d754a06378d81259,Retail,4bf58dd8d48988d1f9941735,Food and Beverage Retail,4bf58dd8d48988d117951735,Candy Store,,,,,,,2025-07-08
4,4bf58dd8d48988d189941735,3,Football Stadium,Arts and Entertainment > Stadium > Football St...,4d4b7104d754a06370d81259,Arts and Entertainment,4bf58dd8d48988d184941735,Stadium,4bf58dd8d48988d189941735,Football Stadium,,,,,,,2025-07-08
5,56aa371be4b08b9a8d573566,3,Ski Store,Retail > Sporting Goods Retail > Ski Store,4d4b7105d754a06378d81259,Retail,4bf58dd8d48988d1f2941735,Sporting Goods Retail,56aa371be4b08b9a8d573566,Ski Store,,,,,,,2025-07-08
6,63be6904847c3692a84b9b31,3,Transmissions Shop,Business and Professional Services > Automotiv...,4d4b7105d754a06375d81259,Business and Professional Services,63be6904847c3692a84b9b2b,Automotive Service,63be6904847c3692a84b9b31,Transmissions Shop,,,,,,,2025-07-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1235,4eb1bdf03b7b55596b4a7491,3,Camera Store,Retail > Computers and Electronics Retail > Ca...,4d4b7105d754a06378d81259,Retail,63be6904847c3692a84b9bea,Computers and Electronics Retail,4eb1bdf03b7b55596b4a7491,Camera Store,,,,,,,2025-07-08
1237,52e81612bcbc57f1066b7a0d,3,Beach Bar,Dining and Drinking > Bar > Beach Bar,63be6904847c3692a84b9bb5,Dining and Drinking,4bf58dd8d48988d116941735,Bar,52e81612bcbc57f1066b7a0d,Beach Bar,,,,,,,2025-07-08
1238,58daa1558bbb0b01f18ec1f7,3,Hospital Unit,Health and Medicine > Hospital > Hospital Unit,63be6904847c3692a84b9bb9,Health and Medicine,4bf58dd8d48988d196941735,Hospital,58daa1558bbb0b01f18ec1f7,Hospital Unit,,,,,,,2025-07-08
1243,56aa371be4b08b9a8d5734fc,3,Confucian Temple,Community and Government > Spiritual Center > ...,63be6904847c3692a84b9b9a,Community and Government,4bf58dd8d48988d131941735,Spiritual Center,56aa371be4b08b9a8d5734fc,Confucian Temple,,,,,,,2025-07-08


In [None]:
categories = df['category_label'].str.split(">").to_list()
max_cols = max([len(i) for i in categories])

df_cat = pd.DataFrame(categories, columns=[f'category_level_{i}' for i in range(1,max_cols+1)])
for col in df_cat.columns:
    df_cat[col] = df_cat[col].str.strip()
df_cat.head()

Unnamed: 0,category_level_1,category_level_2,category_level_3,category_level_4,category_level_5,category_level_6
0,Sports and Recreation,Hockey,Hockey Club,,,
1,Dining and Drinking,Restaurant,Asian Restaurant,Indonesian Restaurant,Indonesian Meatball Restaurant,
2,Retail,Food and Beverage Retail,Candy Store,,,
3,Dining and Drinking,Restaurant,Asian Restaurant,Satay Restaurant,,
4,Arts and Entertainment,Stadium,Football Stadium,,,


In [None]:
df_cat['category_level_1'].value_counts()

Unnamed: 0_level_0,count
category_level_1,Unnamed: 1_level_1
Dining and Drinking,393
Business and Professional Services,195
Retail,150
Community and Government,127
Sports and Recreation,87
Travel and Transportation,72
Arts and Entertainment,72
Landmarks and Outdoors,71
Health and Medicine,59
Event,17


In [None]:
df_cat[df_cat['category_level_1']=='Travel and Transportation']['category_level_2'].unique()

array(['Lodging', 'Travel Agency', 'Transport Hub', 'Baggage Locker',
       'Hot Air Balloon Tour Agency', 'Platform', 'Cable Car',
       'Transportation Service', 'Bike Rental', 'Cruise',
       'Border Crossing', 'Boat Rental', 'Rest Area', 'RV Park',
       'Fuel Station', 'Pier', 'Toll Plaza', 'Street', 'General Travel',
       'Travel Lounge', 'Tourist Information and Service',
       'Boat or Ferry', 'Electric Vehicle Charging Station', 'Train',
       'Moving Target', 'Toll Booth', 'Road', 'Parking', None, 'Port',
       'Truck Stop'], dtype=object)