In [1]:
import pandas as pd

In [2]:
file_path = './filtered_columns_data/'

raw_data2016 = pd.read_csv(file_path + 'LCA_FY2016.csv')

raw_data2017 = pd.read_csv(file_path + 'LCA_FY2017.csv')

raw_data2018 = pd.read_csv(file_path + 'LCA_FY2018.csv')

raw_data2020 = pd.concat(
    map(pd.read_csv, [(file_path+'LCA_FY2020_Q1.csv'), (file_path+'LCA_FY2020_Q2.csv'), 
                      (file_path+'LCA_FY2020_Q3.csv'), (file_path+'LCA_FY2020_Q4.csv')]), ignore_index=True)

raw_data2021 = pd.concat(
    map(pd.read_csv, [(file_path+'LCA_FY2021_Q1.csv'), (file_path+'LCA_FY2021_Q2.csv'), 
                      (file_path+'LCA_FY2021_Q3.csv'), (file_path+'LCA_FY2021_Q4.csv')]), ignore_index=True)

  sort=sort,


In [3]:
# extract cities and states
cities_2016 = raw_data2016[['EMPLOYER_CITY', 'EMPLOYER_STATE']].copy()
cities_2017 = raw_data2017[['EMPLOYER_CITY', 'EMPLOYER_STATE']].copy()
cities_2018 = raw_data2018[['EMPLOYER_CITY', 'EMPLOYER_STATE']].copy()
cities_2020 = raw_data2020[['EMPLOYER_CITY', 'EMPLOYER_STATE']].copy()
cities_2021 = raw_data2021[['EMPLOYER_CITY', 'EMPLOYER_STATE']].copy()

# merge rows
cities = pd.concat([cities_2016, cities_2017, cities_2018, cities_2020, cities_2021])

In [4]:
# process data

# drop nan
cities.dropna(subset = ["EMPLOYER_STATE"], inplace=True)
cities.dropna(subset = ["EMPLOYER_CITY"], inplace=True)

# uppercase cities for consistency
cities["EMPLOYER_CITY"] = cities["EMPLOYER_CITY"].str.upper()

# strip whitespaces
cities["EMPLOYER_CITY"] = cities["EMPLOYER_CITY"].str.lstrip()
cities["EMPLOYER_CITY"] = cities["EMPLOYER_CITY"].str.rstrip()

# remove redundant rows
cities = cities.drop_duplicates()

# sort states in alphabetical order
cities = cities.sort_values('EMPLOYER_STATE').reset_index(drop=True)

In [26]:
# states in our data
# 59 in total (includes 4 more):
# AS: American Samoa
# FM: Federated States of Micronesia
# MP: Northern Mariana Islands
# PW: Palau

data_states = cities['EMPLOYER_STATE'].unique()
data_states

array(['AK', 'AL', 'AR', 'AS', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL',
       'FM', 'GA', 'GU', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA',
       'MA', 'MD', 'ME', 'MI', 'MN', 'MO', 'MP', 'MS', 'MT', 'NC', 'ND',
       'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'PR',
       'PW', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VI', 'VT', 'WA',
       'WI', 'WV', 'WY'], dtype=object)

In [27]:
# official states in the US
# 55 in total
US_states = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CZ', 'CO', 'CT', 'DE', 'DC', 
             'FL', 'GA', 'GU', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY',
            'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE',
            'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR',
            'PA', 'PR', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VI',
            'VA', 'WA', 'WV', 'WI', 'WY']

In [22]:
def cities_per_state(state_name):
    # extract rows of a particular state 
    state_list = cities[cities['EMPLOYER_STATE'].isin([state_name])]
    
    # extract cities of that state
    cities_list = []
    for index, rows in state_list.iterrows():
        # capitalize only the first letters
        cities_list.append(rows.EMPLOYER_CITY.title())
        cities_list.sort()
    print(f"name: <{state_name}>, cities: {cities_list}")
    print(" ")

In [25]:
for state in data_states:
    cities_per_state(state)

name: <AK>, cities: ['Anchorage', 'Barrow', 'Bethel', 'Bettles Field', 'Chevak', 'Cordova', 'Delta Junction', 'Fairbanks', 'Haines', 'Homer', 'Hot Springs', 'Houston', 'Juneau', 'Kenai', 'Ketchikan', 'Kodiak', 'Kotzebue', 'Nome', 'Palmer', 'Sitka', 'Unalakleet', 'Unlaska', 'Wrangell']
 
name: <AL>, cities: ['Adamsville', 'Alabaster', 'Albertville', 'Alexander City', 'Aliceville', 'Andalusia', 'Anniston', 'Arab', 'Ashford', 'Ashland', 'Athens', 'Atmore', 'Auburn', 'Auburn University', 'Bay Minette', 'Bayou La Batre', 'Bessemer', 'Biringham', 'Birmingahm', 'Birmingham', 'Bridgeport', 'Brookwood', 'Burmingham', 'Calvert', 'Center Point', 'Centre', 'Centreville', 'Chelsea', 'Clanton', 'Cottondale', 'Cullman', 'Cusseta', 'Dadeville', 'Daphne', 'Decatur', 'Demopolis', 'Dothan', 'Eastaboga', 'Elba', 'Elberta', 'Elkmont', 'Ensley', 'Enterprise', 'Eufaula', 'Fairfield', 'Fairhope', 'Florence', 'Foley', 'Fort Deposit', 'Fort Payne', 'Gadsden', 'Gadsen', 'Greenville', 'Gulf Shores', 'Hamilton', '

name: <IL>, cities: ['316 West University Drive', '500 W Central Suite 205', '60606', 'Abbott Park', 'Addison', 'Albion', 'Algonguin', 'Algonquin', 'Alsip', 'Altamont', 'Alton', 'Antioch', 'Arcola', 'Arligton Heights', 'Arlington  Heights', 'Arlington Heights', 'Arlington Hts', 'Arthur', 'Assumption', 'Atasca', 'Auburn', 'Aurora', 'Austin', 'Bannockburn', 'Barrington', 'Barrington Hills', 'Barrington,', 'Bartlett', 'Batavia', 'Bedford Park', 'Belleville', 'Bellevue', 'Bellwood', 'Belvidere', 'Bensenville', 'Bensenville,', 'Berkeley', 'Berwyn', 'Bettendorf', 'Bloomigton', 'Bloomingdale', 'Bloomington', 'Blue Island', 'Boca Raton', 'Boilingbrook', 'Boling Brook', 'Bolingbrook', 'Bourbonnais', 'Bradley', 'Bridgeview', 'Brimfield', 'Broadview', 'Brookfield', 'Buffalo Grave', 'Buffalo Grove', 'Bufffalo Grove', 'Burbank', 'Burlington', 'Burnham', 'Burr Ridge', 'Cairo', 'Calumet City', 'Calumet Park', 'Carbondale', 'Carlinville', 'Carlyle', 'Carmel', 'Carol Stream', 'Carpentersville', 'Carrol

name: <NE>, cities: ['1020 East 1St Street', 'Ainsworth', 'Albion', 'Alliance', 'Auburn', 'Aurora', 'Bassett', 'Beatrice', 'Bellevue', 'Benkelman', 'Bennet', 'Bennington', 'Blair', 'Boys Town', 'Bridgeport', 'Broken Bow', 'Columbus', 'Crete', 'Daykin', 'Deshler', 'Douglas', 'Edison', 'Elkhorn', 'Fairbury', 'Falls City', 'Fremont', 'Friend', 'Gordon', 'Grand Island', 'Gretna', 'Hastings', 'Henderson', 'Holdrege', 'Imperial', 'Iselin', 'Jersey City', 'Kearney', 'Kimball', 'La Vista', 'Lavista', 'Lexington', 'Lincoln', 'Lyons', 'Mccook', 'Mead', 'Minden', 'Nebraska', 'Nebraska City', 'Neligh', 'New York', 'Norfolk', 'North Platte', "O'Neill", 'Ogallala', 'Omaha', 'Ord', 'Osmond', 'Palmyra', 'Papillion', 'Pender', 'Pierce', 'Plattsmouth', 'Pleasanton', 'Portland', 'Red Clound', 'Saint Paul', 'San Carlos', 'Schuyler', 'Scottsbluff', 'Seward', 'Shelton', 'Sidney', 'Sparks', 'Stapleton', 'Tekamah', 'Valentine', 'Waterloo', 'Waverly', 'Wayne', 'West Point', 'Winnebago', 'York']
 
name: <NH>, c

name: <TN>, cities: ['Alcoa', 'Alexandria', 'Antioch', 'Arlington', 'Ashland City', 'Athens', 'Atoka', 'Aurora', 'Austin', 'Bartlett', 'Beaumont', 'Big Rock', 'Brentwood', 'Bristol', 'Brownsville', 'Carthage', 'Cary', 'Charleston', 'Chattanooga', 'Clarksville', 'Cleveland', 'Clinton', 'College Grove', 'College Station', 'Collegedale', 'Colliersville', 'Colliervilile', 'Collierville', 'Columbia', 'Cookeville', 'Cordova', 'Covington', 'Crestview Hills', 'Crossville', 'Dandridge', 'Dayton', 'Dickson', 'Dresden', 'Dyersburg', 'Eads', 'East Ridge', 'Farmington Hills', 'Fayetteville', 'Forest', 'Franklin', 'Franlkin', 'Friendsville', 'Gallatin', 'Gatlinburg', 'Germantown', 'Goodlettsville', 'Gordonsville', 'Gray', 'Greeneville', 'Greenville', 'Harrogate', 'Heiskell', 'Henderson', 'Hendersonville', 'Henry', 'Hermitage', 'Hixson', 'Houston', 'Humboldt', 'Irving', 'Jacksboro', 'Jackson', 'Jasper', 'Jefferson City', 'Johnson City', 'Jonesborough', 'Kimball', 'Kingsport', 'Kingston', 'Kingston Sp