## Data Collection for Research Question 2

Question: Is there a correlation between the number of hotels and other short-term rental options and the number of cafes, restaurants (other amenities used by tourists)?

Data collected: yearly counts of the following amenities and areas between 2013 and 2024 (inclusive) from OpenStreetMaps

_Note: yearly counts are calculated as mean of monthly count for each month in a given year. Monthly count is the count at the 1st day of the month._

| Amenities | Areas|
| --- | --- |
| Cafe | London |
| Restaurant |
| Car rental|

In [31]:
import requests
import pandas as pd
import statistics
from pathlib import Path

In [32]:
overpass_url = "http://overpass-api.de/api/interpreter"

# Just to keep original query, will be deleted
overpass_query = """
[out:json][date:"2013-01-01T00:00:00Z"];
area[name="London"][wikipedia="en:London"];
node["amenity"="cafe"](area);
out;
"""

In [33]:
def construct_osm_area_filter(area):
    area_filter = ""
    match area:
        case "London":
            area_filter = f"area[name=\"London\"][wikipedia=\"en:London\"]"
    return area_filter

In [34]:
def construct_osm_amenity_query(date, area, amenity, out_type): 
    out = "out" if out_type == "" else f"out {out_type}" 
    area_filter = construct_osm_area_filter(area)
    amenity_filter =  f"\"amenity\"=\"{amenity}\"" if amenity != "hotel" else "\"tourism\"=\"hotel\""
    overpass_query = f"""
        [out:json][date:"{date}"];
        {area_filter};
        node[{amenity_filter}](area);
        {out};
    """
    return overpass_query

In [35]:
def get_osm_amenity_data(date, area, amenity, out_type = ""):
    overpass_query = construct_osm_amenity_query(date, area, amenity, out_type)
    response = requests.get(overpass_url, params={'data': overpass_query})
    data = response.json()
    return data["elements"]

In [36]:
def get_osm_amenity_data(date, area, amenity, out_type = ""):
    overpass_query = construct_osm_amenity_query(date, area, amenity, out_type)
    response = requests.get(overpass_url, params={'data': overpass_query})
    data = response.json()
    return data["elements"]

In [37]:
def get_osm_amenity_count(date, area, amenity):
    data = get_osm_amenity_data(date, area, amenity, "count")
    str_count = data[0]['tags']['total']
    return int(str_count)

In [38]:
def get_yearly_amenity_count(years, area, amenity):
    yearly_counts = list()

    # Get count at the start of each month, then compute average
    for year in years:
        monthly_counts = list()

        for month in range(1, 13):
            str_month = str(month) if month > 9 else f"0{str(month)}"
            date = f"{year}-{str_month}-01T00:00:00Z"
            count = get_osm_amenity_count(date, area, amenity)
            monthly_counts.append(count)
            print(f"{amenity} count in {area} at {date}: {count}")

        yearly_count = statistics.mean(monthly_counts)
        yearly_counts.append(yearly_count)
        print(f"{year} {amenity} count in {area}: {yearly_count}")
        print("================")
        print()


    df = pd.DataFrame({'year': years, 'count': yearly_counts})
    return df

In [39]:
years = range(2013, 2025)
areas = ["London"]
amenities = ["cafe", "restaurant", "car_rental", "hotel"]

In [40]:
for amenity_index in range(3, len(amenities)):
    for area_index in range(len(areas)):
        data = get_yearly_amenity_count(years, areas[area_index], amenities[amenity_index])
        data['amenity'] = amenities[amenity_index]
        data['area'] = areas[area_index]
        filepath = Path(f'./data/amenities/{areas[area_index]}/{amenities[amenity_index]}.csv')  
        filepath.parent.mkdir(parents=True, exist_ok=True)  
        data.to_csv(filepath, index=False)

KeyboardInterrupt: 