In [10]:
import json, requests, keyring, csv, time, logging
from datetime import datetime
import pandas as pd

client_id = keyring.get_password('foursquare_client_id', 'foursquare_client_id')
client_secret = keyring.get_password('foursquare_client_secret', 'foursquare_client_secret')


lat_long = pd.read_csv('files/lat_long.csv')
category = pd.read_csv('files/categories.csv')

last_run_values = pd.read_csv('files/last_run_values.csv')

# Creating a temp list from the categories starting from the last used category
new_value = last_run_values['values'].values[1]
position = category['category'][category['category'] == new_value].index.to_list()
category_temp_list = category[(position[-1]):]

# URL to search venues example with offset
url = 'https://api.foursquare.com/v2/venues/search'

# Creating a temp list from the categories starting from the last used category
new_value = last_run_values['values'].values[1]
position = category['category'][category['category'] == new_value].index.to_list()
category_temp_list = category[(position[-1]):]

venues = pd.read_csv('files/id_data.csv')
unique_venues = venues['venue_id'].unique()

In [2]:
%load_ext memory_profiler

# Time to read the categories and lat long data
## The files are read only once

In [3]:
%%timeit
lat_long = pd.read_csv('files/lat_long.csv')
category = pd.read_csv('files/categories.csv')

3.38 ms ± 112 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


# Time to read the last used value
## The file is read once on every hour after the hourly limit is reset

In [4]:
%timeit last_run_values = pd.read_csv('files/last_run_values.csv')

1.65 ms ± 146 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


# Time to read the venues IDs
## The file is read once

In [5]:
%timeit venues = pd.read_csv('files/id_data.csv')

3.7 s ± 168 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Time to get the unique venues IDs only
## The process is ran once

In [6]:
%timeit unique_venues = venues['venue_id'].unique()

271 ms ± 3.97 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Memory used to get the unique IDs
## This process runs once

In [7]:
%memit venues['venue_id'].unique()

peak memory: 770.97 MiB, increment: 3.94 MiB


# Performance measure for the get_id_data function

In [16]:
params = dict(
          client_id=client_id,
          client_secret=client_secret,
          v='20210529'  # <-- specifies the latest date for the API version
        )

In [17]:
def ll_generator(ll_list):
    """Yields the next value for lat_long"""
    for i in ll_list:
      yield i

In [24]:
def get_id_data():
    # URL to search venues example with offset
    url = 'https://api.foursquare.com/v2/venues/search'
    last_run_values = pd.read_csv('files/last_run_values.csv')
    # Creating a temp list from the categories starting from the last used category
    new_value = last_run_values['values'].values[-1]
    position = category['category'][category['category'] == new_value].index.to_list()
    category_temp_list = category[(position[-1]):]
    # Create the lat_long generator
    ll_iter = ll_generator(lat_long['lat_long'])
    # value to be saved in the last run values file
    category_id = None
    end_of_search = False
    with open('files/id_data_test.csv', 'a') as csv_file:
      # creating a csv writer object
      csvwriter = csv.writer(csv_file)
      headers = 5000
      # Going over the 5000 requests limit per hour
      while headers > 1:
        for i in category_temp_list['category']:
          category_id = i
          logging.info(f"Runs category ID: {category_id}")
          if headers < 1:
            break
          if end_of_search:
            break
          for a in range(1, len(lat_long)):
            try:
              ll = next(ll_iter)
            except StopIteration:
              # Reset the lat_long generator
              ll_iter = ll_generator(lat_long['lat_long'])
              logging.info('The lat_long generator was reset')
              ll = next(ll_iter)

            params['ll'] = ll
            params['categoryId'] = category_id
            params['limit'] = 50

            resp = requests.get(url=url, params=params)

            try:
              headers = int(dict(resp.headers)['X-RateLimit-Remaining'])
            except:
              logging.warning('The headers did not return a X-RateLimit-Remaining value')

            if headers < 1:
              break
            if category_id == category['category'].tail(1).item() and ll == lat_long['lat_long'].tail(1).item():
              end_of_search = True
              break

            data = json.loads(resp.text)
            if data['meta']['code'] != 200:
              logging.warning(f"Error code in response: {data['meta']['code']}")

            try:
              for venue in data['response']['venues']:
                # writing the data rows to the csv file
                csvwriter.writerows(
                  [
                    [venue['id'], str(datetime.now())]
                  ]
                )
            except:
              logging.critical("Did not write into the id_data.csv file")

In [25]:
%load_ext line_profiler

The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler


In [5]:
# No need to run this. The output is shown in the image viewer below
# %lprun -f get_id_data get_id_data()

In [6]:
from IPython.display import IFrame
IFrame("files/get_id_data_performance.pdf", width=1800, height=1200)