In [1]:
# pip install geopy 
# pip install Nominatim

In [1]:
import pandas as pd
import time
import json
from geopy.adapters import AioHTTPAdapter
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut

### Reading and cleaning Indeed Jobs

In [2]:
path_to_jobs = ('../resources/indeed_jobs.csv')

In [3]:
# Read raw data
jobs = pd.read_csv(path_to_jobs)
jobs.head()

Unnamed: 0,uniq_id,job_title,category,company_name,city,state,country,post_date,job_type,inferred_salary_from,inferred_salary_to,inferred_salary_time_unit
0,19fdcca8fe64fd62515e8e07d98290e4,Material Handler,Transportation-or-logistics,Axium Plastics,Ontario,CA,US,11/10/2020,Full-time,,,
1,c9b16a4a7b1fe7bbf1cbf25dcc2f4031,Full Time HHA/CNA/PCA Position,Healthcare,Right at Home of the Triangle,Raleigh,NC,US,11/11/2020,Full-time,,,
2,082627b3ebd5ececaeef450318769d72,Senior Retail Supervisor,Retail,Goodwill Industries Of Delaware & D,Holmes,PA,US,10/17/2020,Full-time,,,
3,89141ac20e755763933dffa93ef6abc5,Medical Biller / Collector (Wilmington),Administrative,Carolina Dunes Behavioral Health,Wilmington,NC,US,11/30/2020,Full-time,16.0,18.0,hourly
4,62206e5ae69b88bcb0c4f22f3da39dae,Outside Sales Professional,Sales,Terminix,Auburn,AL,US,11/4/2020,Full-time,,,


In [4]:
# Remove 'Remote' from cities column
clean_df = jobs.drop(jobs.loc[jobs['city'] == 'Remote'].index)

In [5]:
# Drop not needed categories (whole rows)
clean_df.drop(clean_df.loc[clean_df['category'] == " national origin"].index, inplace=True)
clean_df.drop(clean_df.loc[clean_df['category'] == " AFL-CIO 3550.</p>"].index, inplace=True)
clean_df.drop(clean_df.loc[clean_df['category'] == "Government-or-military"].index, inplace=True)
clean_df.drop(clean_df.loc[clean_df['category'] == "['ann arbor']"].index, inplace=True)

In [6]:
# Drop States which are '+'pageId:'+e+' in the records (whole rows, these rows have messed up cities also)
# Need data with Cities and States because this data will go to the map visual
clean_df.drop(clean_df.loc[clean_df['state'].str.len() != 2 ].index, inplace=True)

In [7]:
# Renaming categories for cleaner look (they will show up in map visual)
clean_df['category'].replace({'Accounting-or-finance': 'Accounting & Finance'}, inplace=True)
clean_df['category'].replace({'Arts-or-entertainment-or-publishing': 'Entertainment & Publishing'}, inplace=True)
clean_df['category'].replace({'Banking-or-loans': 'Banking & Loans'}, inplace=True)
clean_df['category'].replace({'Computer-or-internet': 'Computer & Internet'}, inplace=True)
clean_df['category'].replace({'Construction-or-facilities': 'Construction & Facilities'}, inplace=True)
clean_df['category'].replace({'Customer-Service': 'Customer Service'}, inplace=True)
clean_df['category'].replace({'Education-or-training': 'Education & Training'}, inplace=True)
clean_df['category'].replace({'Engineering-or-architecture': 'Engineering & Architecture'}, inplace=True)
clean_df['category'].replace({'Hospitality-or-travel': 'Travel & Hospitality'}, inplace=True)
clean_df['category'].replace({'Human-Resources': 'Human Resources'}, inplace=True)
clean_df['category'].replace({'Law-Enforcement-or-security': 'Law Enforcement & Security'}, inplace=True)
clean_df['category'].replace({'Manufacturing-or-mechanical': 'Manufacturing & Mechanical'}, inplace=True)
clean_df['category'].replace({'Marketing-or-advertising-or-pr': 'Marketing & Advertisement'}, inplace=True)
clean_df['category'].replace({'Non-profit/volunteering': 'Non-Profit & Volunteering'}, inplace=True)
clean_df['category'].replace({'Pharmaceutical/bio-tech': 'Pharmaceutical & Bio-Tech'}, inplace=True)
clean_df['category'].replace({'Real-Estate': 'Real Estate'}, inplace=True)
clean_df['category'].replace({'Restaurant-or-food-Service': 'Restaurant & Food Service'}, inplace=True)
clean_df['category'].replace({'Transportation-or-logistics': 'Transportation & Logistics'}, inplace=True)
clean_df['category'].replace({'Upper-Management-or-consulting': 'Consulting & Upper Management'}, inplace=True)

In [8]:
# Renaming the three job titles to correct names (removing strange characters)
clean_df['job_title'].replace({'CartÃ© Hotel San Diego, a Curio Collection by Hilton': 'Curio Collection by Hilton'}, inplace=True)
clean_df['job_title'].replace({"Harrahâ€™s Cherokee Casino Resort": "Harrah's Cherokee Casino Resort"}, inplace=True)
clean_df['job_title'].replace({'Agua Caliente Casino â€¢ Resort â€¢ Spa': 'Agua Caliente Casino Resort Spa'}, inplace=True)

In [9]:
# Renaming some cities to correct names (some aren't full, some have strange characters)
clean_df['city'].replace({"Aber Prov Grd": "Aberdeen Proving Ground"}, inplace=True)
clean_df['city'].replace({"St. Mary\x27s City": "Saint Marys City"}, inplace=True)
clean_df['city'].replace({"Bangor Trident Base": "Bangor"}, inplace=True)
clean_df['city'].replace({"Normandy Farms Estates": "Blue Bell"}, inplace=True)
clean_df['city'].replace({"Sienna Plant": "Sienna Plantation"}, inplace=True)
clean_df['city'].replace({"JBPHH": "Joint Base Pearl Harbor-Hickam"}, inplace=True)
clean_df['city'].replace({"Coeur d\x27Alene": "Coeur d'Alene"}, inplace=True)
clean_df['city'].replace({"Dallas\u002FFort Worth International Airport": "DFW Airport"}, inplace=True)
clean_df['city'].replace({"Homestead AFB": "Homestead"}, inplace=True)
clean_df['city'].replace({"Wpafb": "Wright-Patterson AFB"}, inplace=True)

In [10]:
# Group by category and see if I have only the categories I need
# AND if city, state & job_title records have equal count 
clean_df.groupby('category').count()

Unnamed: 0_level_0,uniq_id,job_title,company_name,city,state,country,post_date,job_type,inferred_salary_from,inferred_salary_to,inferred_salary_time_unit
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Accounting & Finance,1425,1425,1425,1425,1425,1425,1425,1425,328,327,333
Administrative,3419,3419,3419,3419,3419,3419,3419,3419,802,799,797
Banking & Loans,513,513,513,513,513,513,513,513,77,77,76
Computer & Internet,1884,1884,1884,1884,1884,1884,1884,1884,199,198,197
Construction & Facilities,1760,1760,1760,1760,1760,1760,1760,1760,556,548,558
Consulting & Upper Management,464,464,464,464,464,464,464,464,90,90,91
Customer Service,1252,1252,1252,1252,1252,1252,1252,1252,277,271,275
Education & Training,982,982,982,982,982,982,982,982,329,325,327
Engineering & Architecture,491,491,491,491,491,491,491,491,97,95,98
Entertainment & Publishing,299,299,299,299,299,299,299,299,56,55,55


In [11]:
clean_df.head()

Unnamed: 0,uniq_id,job_title,category,company_name,city,state,country,post_date,job_type,inferred_salary_from,inferred_salary_to,inferred_salary_time_unit
0,19fdcca8fe64fd62515e8e07d98290e4,Material Handler,Transportation & Logistics,Axium Plastics,Ontario,CA,US,11/10/2020,Full-time,,,
1,c9b16a4a7b1fe7bbf1cbf25dcc2f4031,Full Time HHA/CNA/PCA Position,Healthcare,Right at Home of the Triangle,Raleigh,NC,US,11/11/2020,Full-time,,,
2,082627b3ebd5ececaeef450318769d72,Senior Retail Supervisor,Retail,Goodwill Industries Of Delaware & D,Holmes,PA,US,10/17/2020,Full-time,,,
3,89141ac20e755763933dffa93ef6abc5,Medical Biller / Collector (Wilmington),Administrative,Carolina Dunes Behavioral Health,Wilmington,NC,US,11/30/2020,Full-time,16.0,18.0,hourly
4,62206e5ae69b88bcb0c4f22f3da39dae,Outside Sales Professional,Sales,Terminix,Auburn,AL,US,11/4/2020,Full-time,,,


### Getting Latitude & Longitude

In [23]:
# Geolocator to get coordinates
geolocator = Nominatim(user_agent="JobMapping")
location = "Georgetown, DE, US"
coord = geolocator.geocode(location, timeout=None)
coord

Location(Georgetown, Williamson County, Texas, United States, (30.6665525, -97.6779637977368, 0.0))

In [12]:
# Create new DF which will have lat and long
locations_df = pd.DataFrame(columns = ['location', 'city', 'state', 'latitude', 'longitude'])

coordinates = []

# Geolocator to get coordinates
geolocator = Nominatim(user_agent="JobMapping")

# Creating 'coordinates' list ['city, state']
for index in clean_df.index:
    location = clean_df['city'][index] + ", " + clean_df['state'][index]
    coordinates.append(location)
    
coordinates_unique = list(set(coordinates))
count = 0

# Getting lat and lon for 'coordinates_unique' 
# AND store lat into 'latitude' list & lon into 'longitude' list
for loc in coordinates_unique:
        location = loc.split(', ')[0] + ", " + loc.split(', ')[1] + ", " + "US"
        print(str(count)+':\t' +location, end='')
        count += 1
        try:
            coord = geolocator.geocode(location, timeout=None)
            if(coord != None):
                locations_df.loc[len(locations_df.index)] = [loc, loc.split(', ')[0], loc.split(', ')[1],coord.latitude,coord.longitude]
                print('\t Good!')
            else:
                print('\t Empty Record!')
        except GeocoderTimedOut as e:
            print("Error: geocode failed on input %s with message %s"%(location, e.message))
            pass
        time.sleep(0.5)

0:	Clinton, MS, US	 Good!
1:	Temple City, CA, US	 Good!
2:	Covington, GA, US	 Good!
3:	Farmers Branch, TX, US	 Good!
4:	Rockville, MD, US	 Good!
5:	Fletcher, NC, US	 Good!
6:	Steamboat Springs, CO, US	 Good!
7:	Fort Shafter, HI, US	 Good!
8:	Batavia, OH, US	 Good!
9:	Creedmoor, NC, US	 Good!
10:	South Bay, FL, US	 Good!
11:	Worcester, MA, US	 Good!
12:	Cherry Point, NC, US	 Good!
13:	Richlands, NC, US	 Good!
14:	Plainview, NE, US	 Good!
15:	Independence, CA, US	 Good!
16:	Gardner, KS, US	 Good!
17:	Providence, RI, US	 Good!
18:	Mukilteo, WA, US	 Good!
19:	Springdale, AR, US	 Good!
20:	Sheffield Village, OH, US	 Good!
21:	Edison, NJ, US	 Good!
22:	Bloomington, MN, US	 Good!
23:	New Castle, PA, US	 Good!
24:	Cicero, IL, US	 Good!
25:	Hoosick Falls, NY, US	 Good!
26:	Miramar Beach, FL, US	 Good!
27:	Elgin, IA, US	 Good!
28:	Antioch, IL, US	 Good!
29:	Bolingbrook, IL, US	 Good!
30:	Clayton, MO, US	 Good!
31:	Oxnard, CA, US	 Good!
32:	Baptist, LA, US	 Good!
33:	Aurora, OH, US	 Good!
34:	Ken

274:	Grass Valley, CA, US	 Good!
275:	Keysville, VA, US	 Good!
276:	Allison Park, PA, US	 Good!
277:	Charlton, MA, US	 Good!
278:	Corpus Christi, TX, US	 Good!
279:	Antioch, TN, US	 Good!
280:	Auburn Hills, MI, US	 Good!
281:	Pleasant Grove, UT, US	 Good!
282:	Alcoa, TN, US	 Good!
283:	Lubbock, TX, US	 Good!
284:	Sparta, NC, US	 Good!
285:	Hamilton, MA, US	 Good!
286:	Scotch Plains, NJ, US	 Good!
287:	Delray Beach, FL, US	 Good!
288:	Morehead City, NC, US	 Good!
289:	Bennett, CO, US	 Good!
290:	Rock Island, IL, US	 Good!
291:	Cameron, LA, US	 Good!
292:	Robinson, PA, US	 Good!
293:	Lakota, ND, US	 Good!
294:	Campbellsville, KY, US	 Good!
295:	Woburn, MA, US	 Good!
296:	Flanders, NJ, US	 Good!
297:	Saint Mary, MO, US	 Good!
298:	Taylorsville, NC, US	 Good!
299:	Paterson, NJ, US	 Good!
300:	Mossville, IL, US	 Good!
301:	Turkey, NC, US	 Good!
302:	Memphis, TN, US	 Good!
303:	Elkton, VA, US	 Good!
304:	Trenton, SC, US	 Good!
305:	Nolensville, TN, US	 Good!
306:	Township of Warren, NJ, US	 

545:	Le Roy, NY, US	 Good!
546:	Saint Cloud, MN, US	 Good!
547:	De Pere, WI, US	 Good!
548:	Pineville, LA, US	 Good!
549:	Burlington, IA, US	 Good!
550:	Boone, NC, US	 Good!
551:	Nixa, MO, US	 Good!
552:	Agoura Hills, CA, US	 Good!
553:	Arverne, NY, US	 Good!
554:	Lorain, OH, US	 Good!
555:	Barnstead, NH, US	 Good!
556:	Ada, MI, US	 Good!
557:	Mansfield, OH, US	 Good!
558:	Wheaton, IL, US	 Good!
559:	Horry County, SC, US	 Good!
560:	Woodward, OK, US	 Good!
561:	Lyndhurst, NJ, US	 Good!
562:	Phoenix, NY, US	 Good!
563:	Garden City, ID, US	 Good!
564:	Niceville, FL, US	 Good!
565:	Burnham, PA, US	 Good!
566:	Dalton, GA, US	 Good!
567:	Palmyra, VA, US	 Good!
568:	Township of Spring, PA, US	 Good!
569:	Bismarck, ND, US	 Good!
570:	Provo, UT, US	 Good!
571:	Highland Heights, OH, US	 Good!
572:	Lisle, IL, US	 Good!
573:	Durant, OK, US	 Good!
574:	Germantown, TN, US	 Good!
575:	Murphys, CA, US	 Good!
576:	Clute, TX, US	 Good!
577:	Gladstone, OR, US	 Good!
578:	Middleton, WI, US	 Good!
579:	Fi

816:	Fort Myers, FL, US	 Good!
817:	Berlin, WI, US	 Good!
818:	Sandy Hook, CT, US	 Good!
819:	Cornelius, NC, US	 Good!
820:	Great Falls, MT, US	 Good!
821:	North Myrtle Beach, SC, US	 Good!
822:	Goldsboro, PA, US	 Good!
823:	Morgan County, IL, US	 Good!
824:	Lakeway, TX, US	 Good!
825:	Savannah, GA, US	 Good!
826:	Victor, NY, US	 Good!
827:	Nellis AFB, NV, US	 Good!
828:	Braddock, PA, US	 Good!
829:	Glendora, CA, US	 Good!
830:	Belton, MO, US	 Good!
831:	Waverly, NC, US	 Good!
832:	Hammond, IN, US	 Good!
833:	Willits, CA, US	 Good!
834:	Dayville, CT, US	 Good!
835:	Shelbyville, KY, US	 Good!
836:	Humboldt, NE, US	 Good!
837:	Fallbrook, CA, US	 Good!
838:	Dallas County, TX, US	 Good!
839:	Moberly, MO, US	 Good!
840:	Clermont, FL, US	 Good!
841:	Geneseo, NY, US	 Good!
842:	Waterloo, NY, US	 Good!
843:	Spring Valley, NY, US	 Good!
844:	Ely, MN, US	 Good!
845:	Lenexa, KS, US	 Good!
846:	Moraine, OH, US	 Good!
847:	Amarillo, TX, US	 Good!
848:	Tomball, TX, US	 Good!
849:	Grundy, VA, US	 Goo

1086:	Portland, MI, US	 Good!
1087:	Foster City, CA, US	 Good!
1088:	Lawrence, KS, US	 Good!
1089:	Citrus Heights, CA, US	 Good!
1090:	Westchester, IL, US	 Good!
1091:	Woodbury, NY, US	 Good!
1092:	Shakopee, MN, US	 Good!
1093:	Mount Airy, MD, US	 Good!
1094:	Bedford Heights, OH, US	 Good!
1095:	Township of Franklin, NJ, US	 Good!
1096:	Meriden, CT, US	 Good!
1097:	Perry, IA, US	 Good!
1098:	Massillon, OH, US	 Good!
1099:	East Rochester, NY, US	 Good!
1100:	Atlantic, NC, US	 Good!
1101:	Lewiston, ME, US	 Good!
1102:	Humble, TX, US	 Good!
1103:	La Quinta, CA, US	 Good!
1104:	North Tonawanda, NY, US	 Good!
1105:	Duluth, GA, US	 Good!
1106:	West Chicago, IL, US	 Good!
1107:	Henderson, KY, US	 Good!
1108:	Hot Springs, AR, US	 Good!
1109:	Shaker Heights, OH, US	 Good!
1110:	Sidney, OH, US	 Good!
1111:	Laguna Beach, CA, US	 Good!
1112:	Keene, NH, US	 Good!
1113:	Darlington, MD, US	 Good!
1114:	Rochester, MN, US	 Good!
1115:	Guaynabo, PR, US	 Good!
1116:	New Buffalo, MI, US	 Good!
1117:	Medfo

1350:	Johnstown, PA, US	 Good!
1351:	Waltham, MA, US	 Good!
1352:	Liberty, MO, US	 Good!
1353:	Columbia, MD, US	 Good!
1354:	Irwindale, CA, US	 Good!
1355:	Forsyth, MT, US	 Good!
1356:	Eighty Four, PA, US	 Good!
1357:	Littleton, CO, US	 Good!
1358:	Covington, LA, US	 Good!
1359:	Montclair, NJ, US	 Good!
1360:	Locust Grove, GA, US	 Good!
1361:	East Brunswick, NJ, US	 Good!
1362:	Jacksonville, NC, US	 Good!
1363:	Forest Hills, NY, US	 Good!
1364:	Damascus, VA, US	 Good!
1365:	DeKalb, IL, US	 Good!
1366:	Neosho, MO, US	 Good!
1367:	Russell, MA, US	 Good!
1368:	Midlothian, VA, US	 Good!
1369:	Stanley, NC, US	 Good!
1370:	Santa Clara Valley, CA, US	 Good!
1371:	Avon, MN, US	 Good!
1372:	Batesville, IN, US	 Good!
1373:	McGuire AFB, NJ, US	 Good!
1374:	Holland, OH, US	 Good!
1375:	Titusville, FL, US	 Good!
1376:	Smithfield, VA, US	 Good!
1377:	Port Angeles, WA, US	 Good!
1378:	Lancaster, CA, US	 Good!
1379:	Culpeper, VA, US	 Good!
1380:	Miami, FL, US	 Good!
1381:	Baxter Springs, KS, US	 Good!

1612:	Frederick, MD, US	 Good!
1613:	Brookhaven, PA, US	 Good!
1614:	West Point, NE, US	 Good!
1615:	Monroe, CT, US	 Good!
1616:	Plymouth, MN, US	 Good!
1617:	Americus, GA, US	 Good!
1618:	Goldsboro, NC, US	 Good!
1619:	Isle of Palms, SC, US	 Good!
1620:	North Oxford, MA, US	 Good!
1621:	Hampton, VA, US	 Good!
1622:	Pacific, MO, US	 Good!
1623:	Wausau, WI, US	 Good!
1624:	East Aurora, NY, US	 Good!
1625:	West Bloomfield Township, MI, US	 Good!
1626:	Clarinda, IA, US	 Good!
1627:	Carlstadt, NJ, US	 Good!
1628:	Stevensville, MD, US	 Good!
1629:	Preston, MN, US	 Good!
1630:	Pharr, TX, US	 Good!
1631:	Newport News, VA, US	 Good!
1632:	Santa Rosa Beach, FL, US	 Good!
1633:	Bell Gardens, CA, US	 Good!
1634:	Marshall, MO, US	 Good!
1635:	Bergen, NY, US	 Good!
1636:	Homer, MI, US	 Good!
1637:	Red Dog Mine, AK, US	 Good!
1638:	Scott AFB, IL, US	 Good!
1639:	Newark, OH, US	 Good!
1640:	Crofton, MD, US	 Good!
1641:	Fostoria, OH, US	 Good!
1642:	Alachua, FL, US	 Good!
1643:	Harrisburg, IL, US	 Goo

1876:	Sonora, CA, US	 Good!
1877:	Corte Madera, CA, US	 Good!
1878:	Cortez, CO, US	 Good!
1879:	Jourdanton, TX, US	 Good!
1880:	Grain Valley, MO, US	 Good!
1881:	Rapid City, SD, US	 Good!
1882:	Desert Hot Springs, CA, US	 Good!
1883:	Forestville, MD, US	 Good!
1884:	Pearisburg, VA, US	 Good!
1885:	Blowing Rock, NC, US	 Good!
1886:	Avenel, NJ, US	 Good!
1887:	Palm City, FL, US	 Good!
1888:	Miami Beach, FL, US	 Good!
1889:	West Haven, CT, US	 Good!
1890:	Ponchatoula, LA, US	 Good!
1891:	Covington, KY, US	 Good!
1892:	Springfield, KY, US	 Good!
1893:	Liberty Lake, WA, US	 Good!
1894:	Holly Springs, GA, US	 Good!
1895:	Elizabethtown, KY, US	 Good!
1896:	Bradford, VT, US	 Good!
1897:	Dunn, NC, US	 Good!
1898:	Twin Falls, ID, US	 Good!
1899:	Clinton, MA, US	 Good!
1900:	Grand Island, NY, US	 Good!
1901:	Middleborough, MA, US	 Good!
1902:	Elmira, NY, US	 Good!
1903:	South Lyon, MI, US	 Good!
1904:	Manahawkin, NJ, US	 Good!
1905:	Philadelphia, PA, US	 Good!
1906:	Sharon Springs, KS, US	 Good!


2139:	Hiram, ME, US	 Good!
2140:	Latrobe, PA, US	 Good!
2141:	Sandpoint, ID, US	 Good!
2142:	Placentia, CA, US	 Good!
2143:	Grove City, OH, US	 Good!
2144:	Hurlburt Field, FL, US	 Good!
2145:	Peapack, NJ, US	 Good!
2146:	Beverly Hills, CA, US	 Good!
2147:	Newcastle, WA, US	 Good!
2148:	Mercer, PA, US	 Good!
2149:	Granby, CT, US	 Good!
2150:	Westerville, OH, US	 Good!
2151:	Apopka, FL, US	 Good!
2152:	Maryland, NY, US	 Good!
2153:	Tampa, FL, US	 Good!
2154:	Walpole, MA, US	 Good!
2155:	Monroe, OH, US	 Good!
2156:	Conway, NH, US	 Good!
2157:	Puyallup, WA, US	 Good!
2158:	Weymouth, MA, US	 Good!
2159:	Decatur, AL, US	 Good!
2160:	Prospect Park, PA, US	 Good!
2161:	Waynesburg, PA, US	 Good!
2162:	Clarksburg, MD, US	 Good!
2163:	Kenilworth, NJ, US	 Good!
2164:	Claremont, NH, US	 Good!
2165:	Clemson, SC, US	 Good!
2166:	Catonsville, MD, US	 Good!
2167:	Black Mountain, NC, US	 Good!
2168:	Barrow, AK, US	 Good!
2169:	Lighthouse Point, FL, US	 Good!
2170:	Lexington, VA, US	 Good!
2171:	Holbrook

2399:	Jamaica Plain, MA, US	 Good!
2400:	Richmond Heights, MO, US	 Good!
2401:	Stone Mountain, GA, US	 Good!
2402:	Frederick County, MD, US	 Good!
2403:	Mission Hills, CA, US	 Good!
2404:	Campbellton, FL, US	 Good!
2405:	Salida, CA, US	 Good!
2406:	Fort Carson, CO, US	 Good!
2407:	Eatontown, NJ, US	 Good!
2408:	Mountain Home AFB, ID, US	 Good!
2409:	Bangor, ME, US	 Good!
2410:	Durham, NH, US	 Good!
2411:	East Liverpool, OH, US	 Good!
2412:	Fort Campbell, TN, US	 Good!
2413:	Baldwin City, KS, US	 Good!
2414:	Allendale, MI, US	 Good!
2415:	North Hatfield, MA, US	 Good!
2416:	Buckeye, AZ, US	 Good!
2417:	Missoula, MT, US	 Good!
2418:	Prineville, OR, US	 Good!
2419:	Elkridge, MD, US	 Good!
2420:	Calipatria, CA, US	 Good!
2421:	Port Lavaca, TX, US	 Good!
2422:	Westport, CT, US	 Good!
2423:	Worthington, OH, US	 Good!
2424:	Fort Gordon, GA, US	 Good!
2425:	Willow Springs, IL, US	 Good!
2426:	Troy, AL, US	 Good!
2427:	Thomasville, NC, US	 Good!
2428:	Centralia, WA, US	 Good!
2429:	Rankin Count

2658:	Barrington, IL, US	 Good!
2659:	Milford, MI, US	 Good!
2660:	Layton, UT, US	 Good!
2661:	Mountain Brook, AL, US	 Good!
2662:	Wexford, PA, US	 Good!
2663:	Henrietta, NY, US	 Good!
2664:	Gregory, TX, US	 Good!
2665:	Ridgefield Park, NJ, US	 Good!
2666:	Bryan, TX, US	 Good!
2667:	Whitley City, KY, US	 Good!
2668:	Binghamton, NY, US	 Good!
2669:	Union City, GA, US	 Good!
2670:	Alamo, CA, US	 Good!
2671:	Dallas\u002FFort Worth International Airport, TX, US	 Empty Record!
2672:	Plant City, FL, US	 Good!
2673:	Carbondale, IL, US	 Good!
2674:	Alhambra, CA, US	 Good!
2675:	Lincolnshire, IL, US	 Good!
2676:	Concord, NH, US	 Good!
2677:	Grand Rapids, MI, US	 Good!
2678:	Ada, OK, US	 Good!
2679:	Hutchinson, KS, US	 Good!
2680:	Sylacauga, AL, US	 Good!
2681:	Warren, MI, US	 Good!
2682:	Catskill, NY, US	 Good!
2683:	Anniston, AL, US	 Good!
2684:	Wayne, MI, US	 Good!
2685:	Norcross, GA, US	 Good!
2686:	Hugo, MN, US	 Good!
2687:	Beatrice, NE, US	 Good!
2688:	Walled Lake, MI, US	 Good!
2689:	Anna

2920:	Saint Pete Beach, FL, US	 Good!
2921:	Highland Heights, KY, US	 Good!
2922:	Pender, NE, US	 Good!
2923:	Bangor, WA, US	 Good!
2924:	Anna, IL, US	 Good!
2925:	Westtown, PA, US	 Good!
2926:	Walla Walla, WA, US	 Good!
2927:	Chaska, MN, US	 Good!
2928:	North Adams, MA, US	 Good!
2929:	Lodi, NJ, US	 Good!
2930:	Mobile County, AL, US	 Good!
2931:	Riverside, MO, US	 Good!
2932:	Brockton, MA, US	 Good!
2933:	Iselin, NJ, US	 Good!
2934:	Oviedo, FL, US	 Good!
2935:	Leominster, MA, US	 Good!
2936:	Town of Waterbury, CT, US	 Good!
2937:	Las Vegas, NM, US	 Good!
2938:	South Windsor, CT, US	 Good!
2939:	Parker, AZ, US	 Good!
2940:	Mount Joy, PA, US	 Good!
2941:	Jenkintown, PA, US	 Good!
2942:	Munising, MI, US	 Good!
2943:	Albuquerque, NM, US	 Good!
2944:	New Glarus, WI, US	 Good!
2945:	Middle Grove, NY, US	 Good!
2946:	North Platte, NE, US	 Good!
2947:	Lorton, VA, US	 Good!
2948:	Huntington Station, NY, US	 Good!
2949:	Petaluma, CA, US	 Good!
2950:	Harrison, AR, US	 Good!
2951:	Frankenmuth, MI

3183:	Rockport, TX, US	 Good!
3184:	Dahlgren, VA, US	 Good!
3185:	Pennsboro, WV, US	 Good!
3186:	Lombard, IL, US	 Good!
3187:	Azusa, CA, US	 Good!
3188:	Chillicothe, OH, US	 Good!
3189:	Nahunta, GA, US	 Good!
3190:	Pascagoula, MS, US	 Good!
3191:	Ada, OH, US	 Good!
3192:	Manasquan, NJ, US	 Good!
3193:	Colonial Heights, VA, US	 Good!
3194:	Knoxville, IA, US	 Good!
3195:	Bethesda, MD, US	 Good!
3196:	Hershey, PA, US	 Good!
3197:	Glenview, IL, US	 Good!
3198:	Gatesville, TX, US	 Good!
3199:	Carthage, NY, US	 Good!
3200:	Poughkeepsie, NY, US	 Good!
3201:	Oxford, MS, US	 Good!
3202:	Monroe County, GA, US	 Good!
3203:	Auburndale, FL, US	 Good!
3204:	Nashotah, WI, US	 Good!
3205:	Herlong, CA, US	 Good!
3206:	New Franken, WI, US	 Good!
3207:	Arcadia, CA, US	 Good!
3208:	Gibsonton, FL, US	 Good!
3209:	Mount Pleasant, SC, US	 Good!
3210:	Canutillo, TX, US	 Good!
3211:	Oakmont, PA, US	 Good!
3212:	Theodore, AL, US	 Good!
3213:	Mediapolis, IA, US	 Good!
3214:	Smithfield, RI, US	 Good!
3215:	Farmin

3443:	Parsons, KS, US	 Good!
3444:	Braselton, GA, US	 Good!
3445:	Township of Randolph, NJ, US	 Good!
3446:	Signal Hill, CA, US	 Good!
3447:	Newton, IA, US	 Good!
3448:	Pennington, NJ, US	 Good!
3449:	Marshall, MI, US	 Good!
3450:	Reed City, MI, US	 Good!
3451:	Solitude, UT, US	 Good!
3452:	Astatula, FL, US	 Good!
3453:	Oregon, OH, US	 Good!
3454:	Central West End, MO, US	 Good!
3455:	Lafayette, NJ, US	 Good!
3456:	Saint Augustine, FL, US	 Good!
3457:	La Vista, NE, US	 Good!
3458:	Cicero, NY, US	 Good!
3459:	Gloucester, MA, US	 Good!
3460:	Columbia, TN, US	 Good!
3461:	Homewood, IL, US	 Good!
3462:	Grant Park, IL, US	 Good!
3463:	Alliance, OH, US	 Good!
3464:	Inland Empire, CA, US	 Good!
3465:	Harmans, MD, US	 Good!
3466:	Burnsville, MN, US	 Good!
3467:	Firestone, CO, US	 Good!
3468:	Clarksville, TN, US	 Good!
3469:	Healdsburg, CA, US	 Good!
3470:	Newburgh, NY, US	 Good!
3471:	Harlingen, TX, US	 Good!
3472:	Millington, TN, US	 Good!
3473:	Walnut Creek, CA, US	 Good!
3474:	Chickasha, OK

3705:	Anacortes, WA, US	 Good!
3706:	Cortland, NY, US	 Good!
3707:	Corydon, IN, US	 Good!
3708:	East Syracuse, NY, US	 Good!
3709:	Peshtigo, WI, US	 Good!
3710:	Ida Grove, IA, US	 Good!
3711:	Urbana, IL, US	 Good!
3712:	Montrose, CO, US	 Good!
3713:	Washington, PA, US	 Good!
3714:	Defiance, OH, US	 Good!
3715:	Saugerties, NY, US	 Good!
3716:	South Lake Tahoe, CA, US	 Good!
3717:	Syracuse, IN, US	 Good!
3718:	Barboursville, WV, US	 Good!
3719:	Wilton Manors, FL, US	 Good!
3720:	International Falls, MN, US	 Good!
3721:	Perkins, OK, US	 Good!
3722:	Los Angeles, CA, US	 Good!
3723:	Kingstowne, VA, US	 Good!
3724:	San Francisco Bay Area, CA, US	 Good!
3725:	Lunenburg, MA, US	 Good!
3726:	Anderson, CA, US	 Good!
3727:	Edgewater, FL, US	 Good!
3728:	Ponte Vedra, FL, US	 Good!
3729:	Chico, CA, US	 Good!
3730:	Branchburg, NJ, US	 Good!
3731:	Madera, CA, US	 Good!
3732:	Mesquite, NV, US	 Good!
3733:	Middle Point, OH, US	 Good!
3734:	Weston, FL, US	 Good!
3735:	Saline, MI, US	 Good!
3736:	Englewo

3967:	Oregon, WI, US	 Good!
3968:	Cordova, TN, US	 Good!
3969:	Lowell, MA, US	 Good!
3970:	Swartz Creek, MI, US	 Good!
3971:	Gresham, OR, US	 Good!
3972:	Fairhope, AL, US	 Good!
3973:	Truth or Consequences, NM, US	 Good!
3974:	Cape Canaveral, FL, US	 Good!
3975:	Ballinger, TX, US	 Good!
3976:	Powhatan, VA, US	 Good!
3977:	Kenosha, WI, US	 Good!
3978:	Dedham, MA, US	 Good!
3979:	Kirtland AFB, NM, US	 Good!
3980:	Falmouth, MA, US	 Good!
3981:	Millersville, MD, US	 Good!
3982:	New Lexington, OH, US	 Good!
3983:	Owasso, OK, US	 Good!
3984:	Avon Park, FL, US	 Good!
3985:	Lincolnwood, IL, US	 Good!
3986:	Redford, MI, US	 Good!
3987:	Dolton, IL, US	 Good!
3988:	Raeford, NC, US	 Good!
3989:	Middlesboro, KY, US	 Good!
3990:	Highland Park, NJ, US	 Good!
3991:	Homosassa, FL, US	 Good!
3992:	Eufaula, AL, US	 Good!
3993:	Green Valley, AZ, US	 Good!
3994:	Brandon, FL, US	 Good!
3995:	Batesville, MS, US	 Good!
3996:	Morristown, NJ, US	 Good!
3997:	Malden, MO, US	 Good!
3998:	Easthampton, MA, US	 Good

4232:	Palm Harbor, FL, US	 Good!
4233:	Kaysville, UT, US	 Good!
4234:	Culver City, CA, US	 Good!
4235:	Killingworth, CT, US	 Good!
4236:	Roanoke, TX, US	 Good!
4237:	Loveland, OH, US	 Good!
4238:	Prince Frederick, MD, US	 Good!
4239:	Romeoville, IL, US	 Good!
4240:	Camp Verde, AZ, US	 Good!
4241:	Southampton, NJ, US	 Good!
4242:	Canby, OR, US	 Good!
4243:	Bedford, MA, US	 Good!
4244:	Downey, CA, US	 Good!
4245:	Ridgewood, NJ, US	 Good!
4246:	Elk Grove, CA, US	 Good!
4247:	Onamia, MN, US	 Good!
4248:	Chesterfield, VA, US	 Good!
4249:	Hollister, ID, US	 Good!
4250:	Cuba, MO, US	 Good!
4251:	Pinckney, MI, US	 Good!
4252:	Wytheville, VA, US	 Good!
4253:	Mount Clemens, MI, US	 Good!
4254:	Payette, ID, US	 Good!
4255:	Hollis, NH, US	 Good!
4256:	Goshen, CA, US	 Good!
4257:	Wayne, PA, US	 Good!
4258:	Rye, NY, US	 Good!
4259:	Temecula, CA, US	 Good!
4260:	Beach, ND, US	 Good!
4261:	Washington Terrace, UT, US	 Good!
4262:	Buchanan, GA, US	 Good!
4263:	Ontario, OR, US	 Good!
4264:	Cinnaminson, N

4494:	Elgin, IL, US	 Good!
4495:	North Reading, MA, US	 Good!
4496:	Allendale, NJ, US	 Good!
4497:	Dayton, TX, US	 Good!
4498:	Rocky Hill, CT, US	 Good!
4499:	Palm Bay, FL, US	 Good!
4500:	Cut Bank, MT, US	 Good!
4501:	Andover, MN, US	 Good!
4502:	Rolla, MO, US	 Good!
4503:	Nacogdoches, TX, US	 Good!
4504:	Brooklyn, MI, US	 Good!
4505:	Malibu, CA, US	 Good!
4506:	Rocky Mount, NC, US	 Good!
4507:	Merrifield, VA, US	 Good!
4508:	Kimberly, OR, US	 Good!
4509:	Quincy, WA, US	 Good!
4510:	Greenville, TX, US	 Good!
4511:	Denver, PA, US	 Good!
4512:	Bartlett, TX, US	 Good!
4513:	Pasco, WA, US	 Good!
4514:	McElhattan, PA, US	 Good!
4515:	Independence, OR, US	 Good!
4516:	Fairless Hills, PA, US	 Good!
4517:	Durham, NC, US	 Good!
4518:	Frederic, WI, US	 Good!
4519:	Gridley, CA, US	 Good!
4520:	Grafton, WI, US	 Good!
4521:	Minden, NV, US	 Good!
4522:	Harrodsburg, KY, US	 Good!
4523:	Andrews, TX, US	 Good!
4524:	Soledad, CA, US	 Good!
4525:	Ripley, TN, US	 Good!
4526:	Sherwood, OR, US	 Good!
4527:

4757:	Orion, MI, US	 Good!
4758:	Park City, UT, US	 Good!
4759:	Tinian, MP, US	 Good!
4760:	Newton, KS, US	 Good!
4761:	Mount Carmel, PA, US	 Good!
4762:	Kamuela, HI, US	 Good!
4763:	Hebron, OH, US	 Good!
4764:	Morganville, NJ, US	 Good!
4765:	List, CA, US	 Good!
4766:	Plymouth, MI, US	 Good!
4767:	Racine, WI, US	 Good!
4768:	Bedford, OH, US	 Good!
4769:	Hockessin, DE, US	 Good!
4770:	Manhasset, NY, US	 Good!
4771:	Vacaville, CA, US	 Good!
4772:	Aberdeen, MD, US	 Good!
4773:	Birdsboro, PA, US	 Good!
4774:	Clayton, NC, US	 Good!
4775:	Princeton, KY, US	 Good!
4776:	Queens, NY, US	 Good!
4777:	Stowe, VT, US	 Good!
4778:	Elizabeth, NJ, US	 Good!
4779:	Derby, KS, US	 Good!
4780:	York, ME, US	 Good!
4781:	Lake Saint Louis, MO, US	 Good!
4782:	Oakdale, MN, US	 Good!
4783:	Mounds View, MN, US	 Good!
4784:	Summerlin, NV, US	 Good!
4785:	Pawnee County, KS, US	 Good!
4786:	Lake Charles, LA, US	 Good!
4787:	Hazard, KY, US	 Good!
4788:	Crystal City, VA, US	 Good!
4789:	Ocean City, MD, US	 Good!
47

5022:	Pasadena, MD, US	 Good!
5023:	Viborg, SD, US	 Good!
5024:	Marysville, WA, US	 Good!
5025:	Cadillac, MI, US	 Good!
5026:	Mahnomen, MN, US	 Good!
5027:	Tukwila, WA, US	 Good!
5028:	North Hills, CA, US	 Good!
5029:	Kissimmee, FL, US	 Good!
5030:	Patchogue, NY, US	 Good!
5031:	North Andover, MA, US	 Good!
5032:	Sterling, CO, US	 Good!
5033:	Key Biscayne, FL, US	 Good!
5034:	Doraville, GA, US	 Good!
5035:	Uniontown, PA, US	 Good!
5036:	Enumclaw, WA, US	 Good!
5037:	West Allis, WI, US	 Good!
5038:	Wolcott, CT, US	 Good!
5039:	Chambersburg, PA, US	 Good!
5040:	La Vernia, TX, US	 Good!
5041:	Dandridge, TN, US	 Good!
5042:	Kingsville, TX, US	 Good!
5043:	Port Jefferson, NY, US	 Good!
5044:	Harlan, IA, US	 Good!
5045:	Seattle, WA, US	 Good!
5046:	Jackson, WY, US	 Good!
5047:	Bedford, PA, US	 Good!
5048:	Redington Shores, FL, US	 Good!
5049:	Jamison, PA, US	 Good!
5050:	Butler, PA, US	 Good!
5051:	Town of Guilford, CT, US	 Good!
5052:	Brentwood, TN, US	 Good!
5053:	Burke, VA, US	 Good!
5054

5284:	Haddam, CT, US	 Good!
5285:	Schofield Barracks, HI, US	 Good!
5286:	Miami, OK, US	 Good!
5287:	Radcliff, KY, US	 Good!
5288:	Hatfield, PA, US	 Good!
5289:	Columbia, MO, US	 Good!
5290:	Charlotte, NC, US	 Good!
5291:	Westmont, IL, US	 Good!
5292:	Elkland, PA, US	 Good!
5293:	Four Oaks, NC, US	 Good!
5294:	Stonegate, CO, US	 Good!
5295:	Jacksboro, TX, US	 Good!
5296:	Creve Coeur, MO, US	 Good!
5297:	Shonto, AZ, US	 Good!
5298:	Buffalo, NY, US	 Good!
5299:	Lakeville, MA, US	 Good!
5300:	Orwigsburg, PA, US	 Good!
5301:	Delavan, WI, US	 Good!
5302:	Webster, TX, US	 Good!
5303:	Orem, UT, US	 Good!
5304:	San Luis Obispo, CA, US	 Good!
5305:	Leipsic, OH, US	 Good!
5306:	Kirkwood, MO, US	 Good!
5307:	Linden, NJ, US	 Good!
5308:	Oxford, AL, US	 Good!
5309:	Kenner, LA, US	 Good!
5310:	Blackwell, OK, US	 Good!
5311:	Azle, TX, US	 Good!
5312:	Hollywood, FL, US	 Good!
5313:	Hudson, WI, US	 Good!
5314:	Tullahoma, TN, US	 Good!
5315:	Camp Lejeune, NC, US	 Good!
5316:	Malden, MA, US	 Good!
5317:	

In [13]:
locations_df.head()

Unnamed: 0,location,city,state,latitude,longitude
0,"Clinton, MS",Clinton,MS,32.341534,-90.321759
1,"Temple City, CA",Temple City,CA,34.108299,-118.057757
2,"Covington, GA",Covington,GA,39.083622,-84.508371
3,"Farmers Branch, TX",Farmers Branch,TX,32.926514,-96.896115
4,"Rockville, MD",Rockville,MD,39.081798,-77.151684


In [14]:
# Merging clean_df and locations_df 
merged_df = pd.merge(clean_df, locations_df, on=['city', 'state'], how='left')
merged_df.head()

Unnamed: 0,uniq_id,job_title,category,company_name,city,state,country,post_date,job_type,inferred_salary_from,inferred_salary_to,inferred_salary_time_unit,location,latitude,longitude
0,19fdcca8fe64fd62515e8e07d98290e4,Material Handler,Transportation & Logistics,Axium Plastics,Ontario,CA,US,11/10/2020,Full-time,,,,"Ontario, CA",34.065846,-117.64843
1,c9b16a4a7b1fe7bbf1cbf25dcc2f4031,Full Time HHA/CNA/PCA Position,Healthcare,Right at Home of the Triangle,Raleigh,NC,US,11/11/2020,Full-time,,,,"Raleigh, NC",35.780398,-78.639099
2,082627b3ebd5ececaeef450318769d72,Senior Retail Supervisor,Retail,Goodwill Industries Of Delaware & D,Holmes,PA,US,10/17/2020,Full-time,,,,"Holmes, PA",39.904279,-75.30852
3,89141ac20e755763933dffa93ef6abc5,Medical Biller / Collector (Wilmington),Administrative,Carolina Dunes Behavioral Health,Wilmington,NC,US,11/30/2020,Full-time,16.0,18.0,hourly,"Wilmington, NC",34.225728,-77.944711
4,62206e5ae69b88bcb0c4f22f3da39dae,Outside Sales Professional,Sales,Terminix,Auburn,AL,US,11/4/2020,Full-time,,,,"Auburn, AL",32.609857,-85.480783


In [15]:
# Getting only the data I need for the map visual
transformed_df = merged_df[['job_title', 'company_name', 'category', 'location', 'latitude', 'longitude']]
transformed_df.head()

Unnamed: 0,job_title,company_name,category,location,latitude,longitude
0,Material Handler,Axium Plastics,Transportation & Logistics,"Ontario, CA",34.065846,-117.64843
1,Full Time HHA/CNA/PCA Position,Right at Home of the Triangle,Healthcare,"Raleigh, NC",35.780398,-78.639099
2,Senior Retail Supervisor,Goodwill Industries Of Delaware & D,Retail,"Holmes, PA",39.904279,-75.30852
3,Medical Biller / Collector (Wilmington),Carolina Dunes Behavioral Health,Administrative,"Wilmington, NC",34.225728,-77.944711
4,Outside Sales Professional,Terminix,Sales,"Auburn, AL",32.609857,-85.480783


In [16]:
indeed_jobs_jsonfile = transformed_df.to_json(orient="index")

In [17]:
# Writing the output to json file 
with open('../resources/indeed_jobs_w.json', 'w') as file:
    file.write(indeed_jobs_jsonfile)