Scrape URL of CraigsList sites from different locations from https://geo.craigslist.org/iso/us and save it to  "CraigsListLinks.xlsx" to be used later for scraping the prices of searched product

In [10]:
import requests, json

from bs4 import BeautifulSoup

import pandas as pd

import os

import re

import time

API_KEY = os.environ["LOCATIONIQ_API_KEY"]

#Function to return geocoordinates of cities 
def FindLatLong(city):
    time.sleep(1)
    url = "https://us1.locationiq.com/v1/search.php"
    params = {
        'key': API_KEY,
        'q': city,
        'format': 'json',
        'limit' : 1,
        'countrycodes': 'us',
        }
    
    retries_count = 0
    
    while True: 
        response = json.loads(requests.get(url, params=params).text)
        if 'error' in response and retries_count < 1:
            params['q'] = re.sub('[^A-Za-z,]',' ',params['q'])
            retries_count +=1
        else:
            break
        
    
    if 'error' in response:
        geo_coord = {'lat': None, 'lon': None}
    else:
        geo_coord = {k : response[0][k] for k in ('lat', 'lon')}
    
    print(city , "  ", geo_coord)
    return geo_coord
    



In [11]:
site_resp = requests.get("https://geo.craigslist.org/iso/us")

site_soup = BeautifulSoup(site_resp.content, "html.parser")

#%%timeit
#pulling out craigslist links from the website
city_links_list = map(lambda x: (x.get_attribute_list('href')[0], x.text), site_soup.section.findAll('a')) 

#creating a dataframe with the city names and corresponding links
df_cities = pd.DataFrame(list(city_links_list), columns=['link', 'city'])

#fining latitude and longitude of cities using LocationIQ geocode services API and adding to dataframe for plotting on map
df_cities = df_cities.merge(df_cities.city.apply(lambda x: pd.Series(FindLatLong(x))), left_index=True, right_index=True)

writer = pd.ExcelWriter('Static/CraigsListLinks.xlsx')

df_cities.to_excel(writer, sheet_name="Links")

writer.save()

print("Excel File Created")

abilene, TX    {'lat': '32.4466741', 'lon': '-99.7333011'}
akron / canton    {'lat': '41.042311', 'lon': '-81.436089'}
albany, GA    {'lat': '31.5782062', 'lon': '-84.1556809'}
albany, NY    {'lat': '42.6511674', 'lon': '-73.754968'}
albuquerque    {'lat': '35.0841034', 'lon': '-106.6509851'}
altoona-johnstown    {'lat': '40.449452', 'lon': '-78.39367'}
amarillo, TX    {'lat': '35.2072185', 'lon': '-101.8338246'}
ames, IA    {'lat': '42.0267703', 'lon': '-93.6170554'}
anchorage / mat-su    {'lat': '61.58083935', 'lon': '-149.237775548072'}
annapolis, MD    {'lat': '38.9786401', 'lon': '-76.492786'}
ann arbor, MI    {'lat': '42.2681569', 'lon': '-83.7312291'}
appleton-oshkosh-FDL    {'lat': '44.02541', 'lon': '-88.548962'}
asheville, NC    {'lat': '35.6009498', 'lon': '-82.5540161'}
ashtabula, OH    {'lat': '41.7167229', 'lon': '-80.74947'}
athens, GA    {'lat': '33.9597677', 'lon': '-83.376398'}
athens, OH    {'lat': '39.3289242', 'lon': '-82.1012479'}
atlanta, GA    {'lat': '33.749098

green bay, WI    {'lat': '44.5126379', 'lon': '-88.0125794'}
greensboro, NC    {'lat': '36.0726355', 'lon': '-79.7919754'}
greenville / upstate    {'lat': '34.78831435', 'lon': '-82.3847959534128'}
gulfport / biloxi    {'lat': '29.421171', 'lon': '-94.69148'}
hanford-corcoran    {'lat': '60.707779', 'lon': '-151.347835'}
harrisburg, PA    {'lat': '40.2663107', 'lon': '-76.8861122'}
harrisonburg, VA    {'lat': '38.4493315', 'lon': '-78.8688833'}
hartford, CT    {'lat': '41.764582', 'lon': '-72.6908547'}
hattiesburg, MS    {'lat': '31.3271189', 'lon': '-89.2903392'}
hawaii    {'lat': '21.2160437', 'lon': '-157.975203'}
heartland florida    {'lat': '26.39543145', 'lon': '-80.0873776748035'}
helena, MT    {'lat': '46.592712', 'lon': '-112.036109'}
hickory / lenoir    {'lat': '35.287663', 'lon': '-77.6541388'}
high rockies    {'lat': '34.00327', 'lon': '-118.236458'}
hilton head    {'lat': '32.1618488', 'lon': '-80.7512616'}
holland, MI    {'lat': '42.7876022', 'lon': '-86.1090828'}
houma, 

oregon coast    {'lat': '45.9858973', 'lon': '-123.922357303404'}
orlando, FL    {'lat': '28.5421097', 'lon': '-81.3790388'}
outer banks    {'lat': '35.5668467', 'lon': '-75.4684908'}
owensboro, KY    {'lat': '37.7742152', 'lon': '-87.1133304'}
palm springs, CA    {'lat': '33.77217945', 'lon': '-116.495297697851'}
panama city, FL    {'lat': '30.1600827', 'lon': '-85.6545729'}
parkersburg-marietta    {'lat': '39.2635223', 'lon': '-81.556547'}
pensacola, FL    {'lat': '30.421309', 'lon': '-87.2169149'}
peoria, IL    {'lat': '40.6938609', 'lon': '-89.5891008'}
philadelphia    {'lat': '39.9524152', 'lon': '-75.1635755'}
phoenix, AZ    {'lat': '33.4485866', 'lon': '-112.0773456'}
pierre / central SD    {'lat': '44.368305', 'lon': '-100.351185'}
pittsburgh, PA    {'lat': '40.4416941', 'lon': '-79.9900861'}
plattsburgh-adirondacks    {'lat': '44.693302', 'lon': '-73.466425'}
poconos    {'lat': '41.0256428', 'lon': '-75.6046334'}
port huron, MI    {'lat': '42.9815877', 'lon': '-82.440466'}
por

western massachusetts    {'lat': '42.6457352', 'lon': '-71.3173401'}
western slope    {'lat': '41.987711', 'lon': '-87.9319344312381'}
west virginia (old)    {'lat': '38.4758406', 'lon': '-80.8408415'}
wichita falls, TX    {'lat': '33.9137085', 'lon': '-98.4933873'}
wichita, KS    {'lat': '37.6922361', 'lon': '-97.3375448'}
williamsport, PA    {'lat': '41.2493292', 'lon': '-77.0027671'}
wilmington, NC    {'lat': '34.2257282', 'lon': '-77.9447107'}
winchester, VA    {'lat': '39.1852184', 'lon': '-78.1652404'}
winston-salem, NC    {'lat': '36.0998131', 'lon': '-80.2440518'}
worcester / central MA    {'lat': '42.2625932', 'lon': '-71.8022934'}
wyoming    {'lat': '43.1700264', 'lon': '-107.5685348'}
yakima, WA    {'lat': '46.601557', 'lon': '-120.5108421'}
york, PA    {'lat': '39.962398', 'lon': '-76.727392'}
youngstown, OH    {'lat': '41.1035786', 'lon': '-80.6520161'}
yuba-sutter, CA    {'lat': '38.9076407', 'lon': '-121.5455588'}
yuma, AZ    {'lat': '32.7253249', 'lon': '-114.624397'}
z