Read US Cities CSV file and put each city/state into a list

In [22]:
import pandas as pd

filename = 'uscities.csv'
cities = []

cities_df = pd.read_csv(filename)
for index,row in cities_df.iterrows():
    city = row.iloc[1]
    city = city.replace('St.','Saint')
    city = city.replace(' ','-')
    state = row.iloc[2]
    cities.append((city.strip(),state.strip()))
    
print(cities[:5])

[('New-York', 'NY'), ('Los-Angeles', 'CA'), ('Chicago', 'IL'), ('Miami', 'FL'), ('Houston', 'TX')]


In [26]:
import pandas as pd

#initialize dictionaries
area_code_dict = dict()
state_code_dict = dict()
state_abbrevs_dict = dict()

#Create dictionary for area codes
area_code_df = pd.read_fwf('area_codes.txt',header=None)
for index, row in area_code_df.iterrows():
    area_type_code, area_code, city_state,display_level,selectable,sort_sequence = row.values
    area_code_dict[str(area_code)] = city_state

#Create dictionary for state codes
state_codes_df = pd.read_fwf('state_codes.txt',header=None)
for index, row in state_codes_df.iterrows():
    code, city_state = row.values
    state_code_dict[city_state] = str(code)

#Create dictionary for state codes
state_abbrevs_df = pd.read_csv('state_abbreviations.txt', sep='\t', header=None)
for index, row in state_abbrevs_df.iterrows():
    state, abbrev = row.values
    state_abbrevs_dict[abbrev] = state

print(area_code_dict)

ValueError: not enough values to unpack (expected 6, got 2)

Use the list of cities and states to webscrape the cost of living data and add it to the cost_of_living.csv

In [None]:
import requests
import sys
from bs4 import BeautifulSoup
import csv

cost_living_csv = 'cost_of_living1002.csv'
header_made = 0

#Filter through the html to get the table data, then add it to the csv
def add_city_data(response,city_name,state):
    global header_made
    html = response.content
    soup = BeautifulSoup(html,'html.parser')
    table = soup.find('table', class_='data_wide_table')
    if not table:
        sys.stderr.write(f'Cannot retreive data for {city_name}\n') 
        return None #end if table does not exist

    row_data = [city_name,state]
    header = ['City','State']

    for row in table.find_all('tr'):
        cols = row.find_all('td')
        data = [td.get_text(strip=True) for td in cols]
        if len(data) > 2:
            item = data[0].strip()
            avg_price = data[1].replace('\xa0$','').strip()
            price_range = data[2].strip()
            prices = (avg_price,price_range)
            row_data.append(prices)
            header.append(item)

    with open(cost_living_csv, 'a', newline = '') as file:
        writer = csv.writer(file)
        if not header_made:
            writer.writerow(header)
            header_made = 1
        writer.writerow(row_data)
        file.flush()
    
    return True

#Create URL from the city/state and retreive website response, call add_city_data function with response as an input
def fetch_data(city,state):
    city_name = city.replace(' ','-')
    url = f'https://www.numbeo.com/cost-of-living/in/{city_name}'
    backup_url = f'https://www.numbeo.com/cost-of-living/in/{city}-{state}-United-States'
    urls = [url,backup_url]

    for url in urls:
        try:
            response = requests.get(url)
            if response.status_code == 200:
                result = add_city_data(response,city_name,state)
                if result: break
        except:
                sys.stderr.write(f'Cannot retreive data for {city_name}\n')

for city, state in cities:
    fetch_data(city,state)

In [None]:
import requests
import sys
from bs4 import BeautifulSoup
import csv

quality_life_csv = 'quality_of_life.csv'
header_made = 0
# cities = cities[101:]

#Filter through the html to get the table data, then add it to the csv
def add_quality_life_data(response,city_name,state):
    global header_made
    html = response.content
    soup = BeautifulSoup(html,'html.parser')
    tables = soup.find_all('table')
    table = tables[2]
    if not table: 
        sys.stderr.write(f'Cannot retreive data for {city_name}\n')
        return False#end if table does not exist

    row_data = [city_name,state]
    header = ['City','State']

    for row in table.find_all('tr'):
        cols = row.find_all('td')
        data = [td.get_text(strip=True) for td in cols]
        if len(data) > 2:
            item = data[0].strip()
            quality = data[1].strip()
            rating = data[2].strip()
            qualities = (quality,rating)
            row_data.append(qualities)
            header.append(item)
        

    with open(quality_life_csv, 'a', newline = '') as file:
        writer = csv.writer(file)
        if not header_made:
            writer.writerow(header)
            header_made = 1
        if len(row_data) > 2:    
            writer.writerow(row_data)
            file.flush()
    
    return True

#Create URL from the city/state and retreive website response, call add_quality_life_data function with response as an input
def fetch_quality_life_data(city,state):
    city_name = city.replace('-',' ')
    url = f'https://www.numbeo.com/quality-of-life/in/{city}'
    backup_url = f'https://www.numbeo.com/quality-of-life/in/{city}-{state}-United-States'
    urls = [url,backup_url]

    for url in urls:
        try:
            response = requests.get(url)
            if response.status_code == 200:
                result = add_quality_life_data(response,city_name,state)
                if result: 
                    break
        except:
            sys.stderr.write(f'Cannot retreive data for {city_name}\n')

for city, state in cities:
    fetch_quality_life_data(city,state)

In [17]:
import json
import requests
import sys

api_key = 'abbbbae8905543da8a40822dc27e7b88'
endpoint = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'

def get_BLS_data(seriesId, **kwargs):
    #Pass BLS timeseries data to return data in JSON format, arguments can be added to specify search
    if len(seriesId) < 1 or len(seriesId) > 25:
        raise ValueError('SeriesId must be at least 1 and less than 25')
    
    headers = {'Content-Type': 'application/json'}
    payload = {
        'seriesid': [seriesId],
        'registrationKey': api_key
    }
    
    #Update payload with keyword arguments and convert to JSON
    payload.update(kwargs)
    payload = json.dumps(payload)

    #Post request to BLS API
    response = requests.post(endpoint, data = payload, headers = headers)
    response.raise_for_status()

    #Return JSON response
    result = json.loads(response.text)
    if result['status'] != 'REQUEST_SUCCEEDED':
        raise Exception(result['message'])
    if result['status'] == 'REQUEST_SUCCEEDED':
        return result
    else:
        return None


In [None]:
import csv 

#Initialize lists, iterators, static values
series_ids = []
input_data = []
n_api_calls = 0
failures = 0
measure_code = '03' #Unemployment Rate
bls_data_file = 'bls_data.csv'


#api key and endpoint for U.S. Bureau of Labor Statistics
api_key = 'abbbbae8905543da8a40822dc27e7b88'
endpoint = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'

def add_BLS_data(average_salary,tuple_data):
    print(tuple_data)
    seriesId,city_name,area_code,state_name,abbrev = tuple_data

    with open(bls_data_file,'a',newline='') as file:
        writer = csv.writer(file)
        row = [average_salary,city_name,area_code,state_name,abbrev]
        writer.writerow(row)
        file.flush()
        print(f'{row} added')

for area_code,city_state in area_code_dict.items():
    city_name = city_state.split(',')[0]
    state_abbrev = city_state.split(',')[1].split('-')
    state_codes = []

    for abbrev in state_abbrev:
        abbrev = abbrev.strip()
        state_name = state_abbrevs_dict.get(abbrev[:2])
        state_code = state_code_dict.get(state_name)
        
        #Build the Series ID for the given zip code 
        seriesId = f'LAUCN{area_code}{measure_code}'
        city_info = (seriesId,city_name,area_code,state_name,abbrev)

        try:
            data = get_BLS_data(seriesId, startyear = 2024, endyear = 2024)['Results']['series'][0]['data']
            average_salary = data[0]['value']
            if average_salary:
                add_BLS_data(average_salary,city_info)
        except:
            failures += 1
            print(f'{seriesId} fail')
        n_api_calls += 1

# json_data = get_BLS_data(seriesId, startyear = 2023, endyear = 2024)
print(f'n: {n_api_calls}\n failures: {failures}')

TypeError: 'dict_items' object is not subscriptable