In [9]:
# Import & Install required packages

!pip install pandas
!pip install googlemaps

import pandas as pd
import numpy as np

import csv
import requests
import json
import googlemaps
import time

import os
import datetime

# Importing functions necessary for URL Encoding (French Accent Handling)
from urllib.parse import quote

#Handle Warnings
import warnings
warnings.filterwarnings('ignore')

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


In [2]:
#API Key Used to fetch Public Data
api_key = "Insert Google Maps API Key here"

In [3]:
# Endpoint for geocoding details
geocoding_endpoint = 'https://maps.googleapis.com/maps/api/geocode/json'

# Endpoint for search details
search_endpoint = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json'

# Endpoint for place details
details_endpoint = 'https://maps.googleapis.com/maps/api/place/details/json'

In [4]:
#List of SubCategories to fetch data

subcategory_list = ['seafood', 'architecture', 'cycling', 'skateboarding', 'jewellery', 'pottery', 'classical', 'standup comedy', 'hockey', 'cinema', 'agriculture', 'theatre', 'swimming', 'place of worship','non-profit', 'science', 'catering', 'workshops', 'jukebox', 'country', 'workshop', 'francophone', 'brunch', 'wine', 'community', 'youth', 'vendors', 'diy','traditional', 'burgers','weaving','rural','florist', 'poetry','dj','nature', 'karaoke', 'visual art', 'history', 'printing', 'local history', 'craft beer','2slgbtq+','latin', 'plants', 'hip hop', 'piano bar','musical theatre', 'photography', 'crafts', 'comedy', 'food','studio', 'fine dining', 'country/folk','dance','media art','performance','skating', 'fitness', 'gallery', 'storytelling', 'choir', 'asian', 'coffee', 'school', 'museum', 'arts', 'festivals', 'religion', 'heritage', 'public art', 'church', 'parks', 'performance art', 'fair', 'private lessons', 'fashion', 'pop', 'orchestra','producer', 'pub', 'tapas', 'indigenous', 'jazz', 'books & lit', 'music', 'lgbtq+', 'video', 'soccer', 'monument', 'farm to table', 'gaming','martial arts','hip-hop','street food', 'visual arts', 'tacos', 'indie', 'contemporary art', 'football', 'skateboard', 'basketball', 'rock', 'baseball','film', 'improv comedy', 'spoken word', 'design', 'photography public art']

#Set Category Name
category_name = "venue"

In [6]:
# Setting centre point & radius for search
ottawa_latitude = 45.255483
ottawa_longitude = -75.767341
radius = 50000

In [7]:
# List of fieldnames
fieldnames = ['NAME','CATEGORY','SUB_CATEGORY','TAGS', 'LATITUDE', 'LONGITUDE', 'POSTAL_CODE','ADDRESS','CITY','PROVINCE','COUNTRY', 'ACTIVE', 'WEBSITE', 'PHONE']

# Create the folder "ScrapingExtract" if it doesn't exist
if not os.path.exists("ScrapingExtract"):
    os.makedirs("ScrapingExtract")

# CSV File to store the results
csv_file_name = 'COO_CSI_' + category_name + '.csv'

csv_file = os.path.join("ScrapingExtract", csv_file_name)

with open(csv_file, 'w', newline='', encoding='utf-8-sig') as file:
    
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()
    
    #Loop for the SubCategories
    for keyword in subcategory_list:
        
        #Search Parameters
        params = {
            'key': api_key,
            'location': f'{ottawa_latitude},{ottawa_longitude}',
            'radius': radius,
            'keyword': keyword,
            'language': 'en,fr'
        }

        while True:
            
            #Fetch Basic Location Information
            response = requests.get(search_endpoint, params=params)
            data = response.json()

            if data['status'] == 'OK':
                results = data['results']

                for result in results:
                    
                    name = ''
                    latitude = ''
                    longitude = ''
                    business_status = ''
                    place_id = ''
                    vicinity = ''
                    
                    if 'name' in result:
                        name = result['name']
                    
                    tags = result.get('types', [])
                    
                    latitude = result['geometry']['location']['lat'] if result.get('geometry') and result['geometry'].get('location') else ''
                    longitude = result['geometry']['location']['lng'] if result.get('geometry') and result['geometry'].get('location') else ''
                    
                    if 'business_status' in result:
                        business_status = result['business_status']
                    if 'place_id' in result:
                        place_id = result['place_id']
                    if 'vicinity' in result:
                        vicinity = result['vicinity']
                    
                    # Fetch 'Website' and 'Phone Number' from Place Details utilizing Place_Id
                    details_params = {
                        'key': api_key,
                        'place_id': place_id,
                        'fields': 'website,formatted_phone_number'
                    }
                    details_response = requests.get(details_endpoint, params=details_params)
                    details_data = details_response.json()

                    website = details_data.get('result', {}).get('website', '')
                    phone_number = details_data.get('result', {}).get('formatted_phone_number', '')
                    
                    #To fetch Address Components
                    geocoding_params = {
                        'key': api_key,
                        'latlng': f'{latitude},{longitude}',
                        'result_type': 'postal_code|street_address',
                        'language': 'en,fr'
                    }
                    
                    geocoding_response = requests.get(geocoding_endpoint, params=geocoding_params)
                    geocoding_data = geocoding_response.json()

                    # Declaration of variables to store the Address Details
                    postal_code = ''
                    Province = ''
                    Country = ''


                    if geocoding_data['status'] == 'OK':
                        results = geocoding_data['results']
                        if results:
                            for component in results[0].get('address_components', []):
                                if 'postal_code' in component['types']:
                                    postal_code = component['long_name']
                                if 'administrative_area_level_2' in component['types']:
                                    administrative_area_level_2 = component['long_name']
                                if 'administrative_area_level_1' in component['types']:
                                    Province = component['long_name']
                                if 'country' in component['types']:
                                    Country = component['long_name']
                    
                    result_dict = {
                        'NAME': name,
                        'CATEGORY':category_name,
                        'SUB_CATEGORY':keyword,
                        'TAGS': ', '.join(tags),
                        'LATITUDE': latitude,
                        'LONGITUDE': longitude,
                        'POSTAL_CODE': postal_code,
                        'ADDRESS': vicinity,
                        'CITY': administrative_area_level_2,
                        'PROVINCE' : Province,
                        'COUNTRY' : Country,
                        'ACTIVE': business_status,
                        'WEBSITE': website,
                        'PHONE': phone_number
                    }
                    
                    # Write the row into the CSV
                    writer.writerow(result_dict)


                if 'next_page_token' in data:
                    # Sleep for a few seconds before making the next request to avoid rate limits
                    time.sleep(2)

                    # Use the next_page_token for the next page
                    next_page_token = data['next_page_token']
                    params['pagetoken'] = next_page_token
                else:
                    # All pages have been fetched
                    break

            else:
                print('Request failed:', data['status'])
                break

Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed: ZERO_RESULTS
Request failed