In [1]:
import csv
import time

import lxml
import requests
from bs4 import BeautifulSoup

In [2]:
cities_list = [
    'Boston',
    'New York',
    'Buffalo',
    'Pittsburgh',
    'Philadelphia',
    'Baltimore',
    'Washington D.C.',
    'Norfolk',
    'Charlotte',
    'Atlanta',
    'Jacksonville',
    'Miami',
    'Tampa',
    'Birmingham',
    'Memphis',
    'Nashville',
    'Louisville',
    'Indianapolis',
    'Cincinnati',
    'Columbus',
    'Cleveland',
    'Detroit',
    'Chicago',
    'Milwaukee',
    'Twin Cities',
    'St. Louis',
    'Kansas City',
    'Wichita',
    'Oklahoma City',
    'New Orleans',
    'Dallas',
    'Houston',
    'Austin',
    'San Antonio',
    'Albuquerque',
    'Phoenix',
    'Las Vegas',
    'Colorado Springs',
    'Denver',
    'Salt Lake City',
    'Seattle',
    'Portland',
    'San Francisco',
    'San Jose',
    'Fresno',
    'Los Angeles',
    'San Diego',
    'Anchorage',
    'Honolulu'
]

In [3]:
len(cities_list)

49

In [3]:
response = requests.get('https://en.wikipedia.org/wiki/Boston')

In [4]:
response

<Response [200]>

In [5]:
soup = BeautifulSoup(response.text, 'lxml')

In [6]:
soup.find("span", {"class": "geo"})

<span class="geo">42.35806; -71.06361</span>

In [7]:
soup.find("span", {"class": "geo"}).text

'42.35806; -71.06361'

In [8]:
soup.find("span", {"class": "geo"}).text.split('; ')

['42.35806', '-71.06361']

In [9]:
lat_longs = []
for city in cities_list:
    response = requests.get(f'https://en.wikipedia.org/wiki/{city}')
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'lxml')
        try:
            lat_longs.append(soup.find("span", {"class": "geo"}).text.split('; '))
        except AttributeError:
            lat_longs.append(0)
    else:
        lat_longs.append(0)
    time.sleep(5)

In [10]:
lat_longs

[['42.35806', '-71.06361'],
 0,
 0,
 ['40.43972', '-79.97639'],
 ['39.95278', '-75.16361'],
 ['39.28944', '-76.61528'],
 ['38.9101', '-77.0147'],
 ['52.667', '1.000'],
 ['35.22722', '-80.84306'],
 ['33.75500', '-84.39000'],
 ['30.33694', '-81.66139'],
 ['25.77528', '-80.20889'],
 ['27.96806', '-82.47639'],
 ['52.48000', '-1.90250'],
 0,
 ['36.16667', '-86.78333'],
 ['38.25611', '-85.75139'],
 ['39.76861', '-86.15806'],
 ['39.100', '-84.517'],
 0,
 ['41.48222', '-81.66972'],
 ['42.33139', '-83.04583'],
 ['41.88194', '-87.62778'],
 ['43.05', '-87.95'],
 ['44.950', '-93.200'],
 ['38.62722', '-90.19778'],
 ['39.1', '-94.58'],
 0,
 ['35.46861', '-97.52139'],
 ['29.95', '-90.08'],
 ['32.77917', '-96.80889'],
 ['29.76278', '-95.38306'],
 ['30.26722', '-97.74306'],
 ['29.42500', '-98.49389'],
 ['35.11083', '-106.61000'],
 0,
 ['36.17500', '-115.13639'],
 ['38.83389', '-104.82528'],
 ['39.73917', '-104.99028'],
 ['40.76083', '-111.89111'],
 ['47.60972', '-122.33306'],
 0,
 ['37.77750', '-122.41

In [11]:
for index in range(len(lat_longs)):
    if lat_longs[index] == 0:
        print(cities_list[index])

New York
Buffalo
Memphis
Columbus
Wichita
Phoenix
Portland
San Jose


In [13]:
corrected_cities = [
    'New_York_City',
    'Buffalo,_New_York',
    'Memphis,_Tennessee',
    'Columbus,_Ohio',
    'Wichita,_Kansas',
    'Phoenix,_Arizona',
    'Portland,_Oregon',
    'San_Jose,_California'
]

In [14]:
corrected_lat_longs = []
for city in corrected_cities:
    response = requests.get(f'https://en.wikipedia.org/wiki/{city}')
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'lxml')
        try:
            corrected_lat_longs.append(soup.find("span", {"class": "geo"}).text.split('; '))
        except AttributeError:
            corrected_lat_longs.append(0)
    else:
        corrected_lat_longs.append(0)
    time.sleep(5)

In [15]:
corrected_lat_longs

[['40.712740', '-74.005974'],
 ['42.90472', '-78.84944'],
 ['35.11750', '-89.97111'],
 ['39.96222', '-83.00056'],
 ['37.68889', '-97.33611'],
 ['33.450', '-112.067'],
 ['45.52000', '-122.68194'],
 ['37.333', '-121.900']]

In [16]:
complete_lat_longs = []
correction_index = 0
for coords in lat_longs:
    if coords == 0:
        complete_lat_longs.append(corrected_lat_longs[correction_index])
        correction_index += 1
    else:
        complete_lat_longs.append(coords)

In [17]:
complete_lat_longs

[['42.35806', '-71.06361'],
 ['40.712740', '-74.005974'],
 ['42.90472', '-78.84944'],
 ['40.43972', '-79.97639'],
 ['39.95278', '-75.16361'],
 ['39.28944', '-76.61528'],
 ['38.9101', '-77.0147'],
 ['52.667', '1.000'],
 ['35.22722', '-80.84306'],
 ['33.75500', '-84.39000'],
 ['30.33694', '-81.66139'],
 ['25.77528', '-80.20889'],
 ['27.96806', '-82.47639'],
 ['52.48000', '-1.90250'],
 ['35.11750', '-89.97111'],
 ['36.16667', '-86.78333'],
 ['38.25611', '-85.75139'],
 ['39.76861', '-86.15806'],
 ['39.100', '-84.517'],
 ['39.96222', '-83.00056'],
 ['41.48222', '-81.66972'],
 ['42.33139', '-83.04583'],
 ['41.88194', '-87.62778'],
 ['43.05', '-87.95'],
 ['44.950', '-93.200'],
 ['38.62722', '-90.19778'],
 ['39.1', '-94.58'],
 ['37.68889', '-97.33611'],
 ['35.46861', '-97.52139'],
 ['29.95', '-90.08'],
 ['32.77917', '-96.80889'],
 ['29.76278', '-95.38306'],
 ['30.26722', '-97.74306'],
 ['29.42500', '-98.49389'],
 ['35.11083', '-106.61000'],
 ['33.450', '-112.067'],
 ['36.17500', '-115.13639'],

In [18]:
final_lat_longs = complete_lat_longs.copy()

In [19]:
final_lat_longs[7] = ['36.917', '-76.200']
final_lat_longs[13] = ['33.51861', '-86.81028']

In [20]:
final_lat_longs

[['42.35806', '-71.06361'],
 ['40.712740', '-74.005974'],
 ['42.90472', '-78.84944'],
 ['40.43972', '-79.97639'],
 ['39.95278', '-75.16361'],
 ['39.28944', '-76.61528'],
 ['38.9101', '-77.0147'],
 ['36.917', '-76.200'],
 ['35.22722', '-80.84306'],
 ['33.75500', '-84.39000'],
 ['30.33694', '-81.66139'],
 ['25.77528', '-80.20889'],
 ['27.96806', '-82.47639'],
 ['33.51861', '-86.81028'],
 ['35.11750', '-89.97111'],
 ['36.16667', '-86.78333'],
 ['38.25611', '-85.75139'],
 ['39.76861', '-86.15806'],
 ['39.100', '-84.517'],
 ['39.96222', '-83.00056'],
 ['41.48222', '-81.66972'],
 ['42.33139', '-83.04583'],
 ['41.88194', '-87.62778'],
 ['43.05', '-87.95'],
 ['44.950', '-93.200'],
 ['38.62722', '-90.19778'],
 ['39.1', '-94.58'],
 ['37.68889', '-97.33611'],
 ['35.46861', '-97.52139'],
 ['29.95', '-90.08'],
 ['32.77917', '-96.80889'],
 ['29.76278', '-95.38306'],
 ['30.26722', '-97.74306'],
 ['29.42500', '-98.49389'],
 ['35.11083', '-106.61000'],
 ['33.450', '-112.067'],
 ['36.17500', '-115.13639

In [28]:
with open('cities.csv', 'w') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(["City", "Latitude", "Longitude"])
    for index in range(len(cities_list)):
        csvwriter.writerow([cities_list[index]] + final_lat_longs[index])