In [1]:
# pip install geopy 
# pip install Nominatim

In [1]:
import pandas as pd
import time
import json
from geopy.adapters import AioHTTPAdapter
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut

### Reading and cleaning Indeed Jobs

In [2]:
path_to_jobs = ('../resources/indeed_jobs.csv')

In [3]:
# Read raw data
jobs = pd.read_csv(path_to_jobs)
jobs.head()

Unnamed: 0,uniq_id,job_title,category,company_name,city,state,country,post_date,job_type,inferred_salary_from,inferred_salary_to,inferred_salary_time_unit
0,19fdcca8fe64fd62515e8e07d98290e4,Material Handler,Transportation-or-logistics,Axium Plastics,Ontario,CA,US,11/10/2020,Full-time,,,
1,c9b16a4a7b1fe7bbf1cbf25dcc2f4031,Full Time HHA/CNA/PCA Position,Healthcare,Right at Home of the Triangle,Raleigh,NC,US,11/11/2020,Full-time,,,
2,082627b3ebd5ececaeef450318769d72,Senior Retail Supervisor,Retail,Goodwill Industries Of Delaware & D,Holmes,PA,US,10/17/2020,Full-time,,,
3,89141ac20e755763933dffa93ef6abc5,Medical Biller / Collector (Wilmington),Administrative,Carolina Dunes Behavioral Health,Wilmington,NC,US,11/30/2020,Full-time,16.0,18.0,hourly
4,62206e5ae69b88bcb0c4f22f3da39dae,Outside Sales Professional,Sales,Terminix,Auburn,AL,US,11/4/2020,Full-time,,,


In [4]:
# Remove 'Remote' from cities column
clean_df = jobs.drop(jobs.loc[jobs['city'] == 'Remote'].index)

In [5]:
# Drop not needed categories (whole rows)
clean_df.drop(clean_df.loc[clean_df['category'] == " national origin"].index, inplace=True)
clean_df.drop(clean_df.loc[clean_df['category'] == " AFL-CIO 3550.</p>"].index, inplace=True)
clean_df.drop(clean_df.loc[clean_df['category'] == "Government-or-military"].index, inplace=True)
clean_df.drop(clean_df.loc[clean_df['category'] == "['ann arbor']"].index, inplace=True)

In [6]:
# Drop States which are '+'pageId:'+e+' in the records (whole rows, these rows have messed up cities also)
# Need data with Cities and States because this data will go to the map visual
clean_df.drop(clean_df.loc[clean_df['state'].str.len() != 2 ].index, inplace=True)

In [7]:
# Renaming categories for cleaner look (they will show up in map visual)
clean_df['category'].replace({'Accounting-or-finance': 'Accounting & Finance'}, inplace=True)
clean_df['category'].replace({'Arts-or-entertainment-or-publishing': 'Entertainment & Publishing'}, inplace=True)
clean_df['category'].replace({'Banking-or-loans': 'Banking & Loans'}, inplace=True)
clean_df['category'].replace({'Computer-or-internet': 'Computer & Internet'}, inplace=True)
clean_df['category'].replace({'Construction-or-facilities': 'Construction & Facilities'}, inplace=True)
clean_df['category'].replace({'Customer-Service': 'Customer Service'}, inplace=True)
clean_df['category'].replace({'Education-or-training': 'Education & Training'}, inplace=True)
clean_df['category'].replace({'Engineering-or-architecture': 'Engineering & Architecture'}, inplace=True)
clean_df['category'].replace({'Hospitality-or-travel': 'Travel & Hospitality'}, inplace=True)
clean_df['category'].replace({'Human-Resources': 'Human Resources'}, inplace=True)
clean_df['category'].replace({'Law-Enforcement-or-security': 'Law Enforcement & Security'}, inplace=True)
clean_df['category'].replace({'Manufacturing-or-mechanical': 'Manufacturing & Mechanical'}, inplace=True)
clean_df['category'].replace({'Marketing-or-advertising-or-pr': 'Marketing & Advertisement'}, inplace=True)
clean_df['category'].replace({'Non-profit/volunteering': 'Non-Profit & Volunteering'}, inplace=True)
clean_df['category'].replace({'Pharmaceutical/bio-tech': 'Pharmaceutical & Bio-Tech'}, inplace=True)
clean_df['category'].replace({'Real-Estate': 'Real Estate'}, inplace=True)
clean_df['category'].replace({'Restaurant-or-food-Service': 'Restaurant & Food Service'}, inplace=True)
clean_df['category'].replace({'Transportation-or-logistics': 'Transportation & Logistics'}, inplace=True)
clean_df['category'].replace({'Upper-Management-or-consulting': 'Consulting & Upper Management'}, inplace=True)

In [8]:
# Renaming the three job titles to correct names (removing strange characters)
clean_df['job_title'].replace({'CartÃ© Hotel San Diego, a Curio Collection by Hilton': 'Curio Collection by Hilton'}, inplace=True)
clean_df['job_title'].replace({"Harrahâ€™s Cherokee Casino Resort": "Harrah's Cherokee Casino Resort"}, inplace=True)
clean_df['job_title'].replace({'Agua Caliente Casino â€¢ Resort â€¢ Spa': 'Agua Caliente Casino Resort Spa'}, inplace=True)

In [9]:
# Renaming some cities to correct names (some aren't full, some have strange characters)
clean_df['city'].replace({"Aber Prov Grd": "Aberdeen Proving Ground"}, inplace=True)
clean_df['city'].replace({"St. Mary\x27s City": "Saint Marys City"}, inplace=True)
clean_df['city'].replace({"Bangor Trident Base": "Bangor"}, inplace=True)
clean_df['city'].replace({"Normandy Farms Estates": "Blue Bell"}, inplace=True)
clean_df['city'].replace({"Sienna Plant": "Sienna Plantation"}, inplace=True)
clean_df['city'].replace({"JBPHH": "Joint Base Pearl Harbor-Hickam"}, inplace=True)
clean_df['city'].replace({"Coeur d\x27Alene": "Coeur d'Alene"}, inplace=True)
clean_df['city'].replace({"Dallas\u002FFort Worth International Airport": "DFW Airport"}, inplace=True)
clean_df['city'].replace({"Homestead AFB": "Homestead"}, inplace=True)
clean_df['city'].replace({"Wpafb": "Wright-Patterson AFB"}, inplace=True)

In [10]:
# Group by category and see if I have only the categories I need
# AND if city, state & job_title records have equal count 
clean_df.groupby('category').count()

Unnamed: 0_level_0,uniq_id,job_title,company_name,city,state,country,post_date,job_type,inferred_salary_from,inferred_salary_to,inferred_salary_time_unit
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Accounting & Finance,1425,1425,1425,1425,1425,1425,1425,1425,328,327,333
Administrative,3419,3419,3419,3419,3419,3419,3419,3419,802,799,797
Banking & Loans,513,513,513,513,513,513,513,513,77,77,76
Computer & Internet,1884,1884,1884,1884,1884,1884,1884,1884,199,198,197
Construction & Facilities,1760,1760,1760,1760,1760,1760,1760,1760,556,548,558
Consulting & Upper Management,464,464,464,464,464,464,464,464,90,90,91
Customer Service,1252,1252,1252,1252,1252,1252,1252,1252,277,271,275
Education & Training,982,982,982,982,982,982,982,982,329,325,327
Engineering & Architecture,491,491,491,491,491,491,491,491,97,95,98
Entertainment & Publishing,299,299,299,299,299,299,299,299,56,55,55


In [11]:
clean_df.head()

Unnamed: 0,uniq_id,job_title,category,company_name,city,state,country,post_date,job_type,inferred_salary_from,inferred_salary_to,inferred_salary_time_unit
0,19fdcca8fe64fd62515e8e07d98290e4,Material Handler,Transportation & Logistics,Axium Plastics,Ontario,CA,US,11/10/2020,Full-time,,,
1,c9b16a4a7b1fe7bbf1cbf25dcc2f4031,Full Time HHA/CNA/PCA Position,Healthcare,Right at Home of the Triangle,Raleigh,NC,US,11/11/2020,Full-time,,,
2,082627b3ebd5ececaeef450318769d72,Senior Retail Supervisor,Retail,Goodwill Industries Of Delaware & D,Holmes,PA,US,10/17/2020,Full-time,,,
3,89141ac20e755763933dffa93ef6abc5,Medical Biller / Collector (Wilmington),Administrative,Carolina Dunes Behavioral Health,Wilmington,NC,US,11/30/2020,Full-time,16.0,18.0,hourly
4,62206e5ae69b88bcb0c4f22f3da39dae,Outside Sales Professional,Sales,Terminix,Auburn,AL,US,11/4/2020,Full-time,,,


### Getting Latitude & Longitude

In [12]:
# Geolocator to get coordinates
geolocator = Nominatim(user_agent="JobMapping")
location = "Georgetown, DE, US"
coord = geolocator.geocode(location, timeout=None)
coord

Location(Georgetown, Williamson County, Texas, United States, (30.6665525, -97.6779637977368, 0.0))

In [13]:
# Create new DF which will have lat and long
locations_df = pd.DataFrame(columns = ['location', 'city', 'state', 'latitude', 'longitude'])

coordinates = []

# Geolocator to get coordinates
geolocator = Nominatim(user_agent="JobMapping")

# Creating 'coordinates' list ['city, state']
for index in clean_df.index:
    location = clean_df['city'][index] + ", " + clean_df['state'][index]
    coordinates.append(location)
    
coordinates_unique = list(set(coordinates))
count = 0

# Getting lat and lon for 'coordinates_unique' 
# AND store lat into 'latitude' list & lon into 'longitude' list
for loc in coordinates_unique:
        location = loc.split(', ')[0] + ", " + loc.split(', ')[1] + ", " + "US"
        print(str(count)+':\t' +location, end='')
        count += 1
        try:
            coord = geolocator.geocode(location, timeout=None)
            if(coord != None):
                locations_df.loc[len(locations_df.index)] = [loc, loc.split(', ')[0], loc.split(', ')[1],coord.latitude,coord.longitude]
                print('\t Good!')
            else:
                print('\t Empty Record!')
        except GeocoderTimedOut as e:
            print("Error: geocode failed on input %s with message %s"%(location, e.message))
            pass
        time.sleep(0.5)

0:	Emmitsburg, MD, US	 Good!
1:	Valencia, PA, US	 Good!
2:	Franklin Lakes, NJ, US	 Good!
3:	River Falls, WI, US	 Good!
4:	Baird, TX, US	 Good!
5:	Putnam, CT, US	 Good!
6:	Cape Coral, FL, US	 Good!
7:	Brewster, MA, US	 Good!
8:	Clarkston, WA, US	 Good!
9:	East Hanover, NJ, US	 Good!
10:	Poland, ME, US	 Good!
11:	Magnolia, AR, US	 Good!
12:	Atlantis, FL, US	 Good!
13:	San Angelo, TX, US	 Good!
14:	Pine Ridge, SD, US	 Good!
15:	Butte, MT, US	 Good!
16:	Superior, WI, US	 Good!
17:	Lakewood, CO, US	 Good!
18:	Newark, DE, US	 Good!
19:	West Lafayette, IN, US	 Good!
20:	Four Oaks, NC, US	 Good!
21:	Merrimack, NH, US	 Good!
22:	Yorba Linda, CA, US	 Good!
23:	Truth or Consequences, NM, US	 Good!
24:	Terrell, TX, US	 Good!
25:	Haile, LA, US	 Good!
26:	Roland Park, MD, US	 Good!
27:	Amherst, NY, US	 Good!
28:	Collierville, TN, US	 Good!
29:	Grundy, VA, US	 Good!
30:	Crown Point, IN, US	 Good!
31:	DeLand, FL, US	 Good!
32:	Waterford, WI, US	 Good!
33:	Kansas City, KS, US	 Good!
34:	Fort Riley, KS,

275:	Blackwell, OK, US	 Good!
276:	Storrs, CT, US	 Good!
277:	Totowa, NJ, US	 Good!
278:	Lees Summit, MO, US	 Good!
279:	Alexandria, VA, US	 Good!
280:	Palm Springs, CA, US	 Good!
281:	Pontotoc, MS, US	 Good!
282:	Charlottesville, VA, US	 Good!
283:	Pleasanton, CA, US	 Good!
284:	Beaverton, MI, US	 Good!
285:	Moses Lake, WA, US	 Good!
286:	Eugene, OR, US	 Good!
287:	Braselton, GA, US	 Good!
288:	North Hills, CA, US	 Good!
289:	Mounds View, MN, US	 Good!
290:	Artesia, NM, US	 Good!
291:	Ossining, NY, US	 Good!
292:	Port Arthur, TX, US	 Good!
293:	Hartford, KY, US	 Good!
294:	Providence, RI, US	 Good!
295:	Shelby, NC, US	 Good!
296:	Freeland, MI, US	 Good!
297:	Budd Lake, NJ, US	 Good!
298:	Marietta, SC, US	 Good!
299:	Glenpool, OK, US	 Good!
300:	Texas, IN, US	 Good!
301:	Bishopville, SC, US	 Good!
302:	Reedsville, PA, US	 Good!
303:	Havertown, PA, US	 Good!
304:	Canton, IL, US	 Good!
305:	Santa Fe Springs, CA, US	 Good!
306:	Richmond Heights, OH, US	 Good!
307:	Calumet City, IL, US	 Go

545:	Capitol Heights, MD, US	 Good!
546:	Middlesboro, KY, US	 Good!
547:	Mission, KS, US	 Good!
548:	North East, MD, US	 Good!
549:	Palm Bay, FL, US	 Good!
550:	Kirtland, NM, US	 Good!
551:	Edison, NJ, US	 Good!
552:	Antigo, WI, US	 Good!
553:	Trumbull, CT, US	 Good!
554:	Gambrills, MD, US	 Good!
555:	Hampton, VA, US	 Good!
556:	Sausalito, CA, US	 Good!
557:	Truckee, CA, US	 Good!
558:	Hancock, WI, US	 Good!
559:	Chatsworth, CA, US	 Good!
560:	Vail, AZ, US	 Good!
561:	Sterling, VA, US	 Good!
562:	French Camp, CA, US	 Good!
563:	Hyde Park, MA, US	 Good!
564:	Hutchinson, KS, US	 Good!
565:	Lake Charles, LA, US	 Good!
566:	Maumee, OH, US	 Good!
567:	Pine Grove, PA, US	 Good!
568:	Avon Park, FL, US	 Good!
569:	Graphic, AR, US	 Good!
570:	Isanti, MN, US	 Good!
571:	Bethlehem, PA, US	 Good!
572:	Clinton, PA, US	 Good!
573:	Shannon, MS, US	 Good!
574:	Greenlawn, NY, US	 Good!
575:	Rockford, IL, US	 Good!
576:	Independence, KY, US	 Good!
577:	Tannersville, PA, US	 Good!
578:	Dallas\u002FFort W

816:	Ozark, MO, US	 Good!
817:	Center Line, MI, US	 Good!
818:	Commerce, TX, US	 Good!
819:	Bloomfield Hills, MI, US	 Good!
820:	Saint Cloud, FL, US	 Good!
821:	Abbott Park, IL, US	 Good!
822:	Wailuku, HI, US	 Good!
823:	Yorktown Heights, NY, US	 Good!
824:	Springfield, MA, US	 Good!
825:	Vernon, TX, US	 Good!
826:	Eden Prairie, MN, US	 Good!
827:	Rapid City, SD, US	 Good!
828:	Goleta, CA, US	 Good!
829:	Dayton, TN, US	 Good!
830:	Dublin, VA, US	 Good!
831:	New Franken, WI, US	 Good!
832:	Lacon, IL, US	 Good!
833:	Bunnell, FL, US	 Good!
834:	West Jordan, UT, US	 Good!
835:	Georgetown, OH, US	 Good!
836:	Aberdeen, NC, US	 Good!
837:	Gadsden, AL, US	 Good!
838:	Township of Lawrence, IN, US	 Good!
839:	Shorewood, WI, US	 Good!
840:	Cypress, TX, US	 Good!
841:	Greenfield, IN, US	 Good!
842:	Katy, TX, US	 Good!
843:	Town of Cortlandt, NY, US	 Good!
844:	Melville, NY, US	 Good!
845:	Bohemia, NY, US	 Good!
846:	Southborough, MA, US	 Good!
847:	Pearl River, NY, US	 Good!
848:	Westlake, TX, US	

1083:	Mauston, WI, US	 Good!
1084:	Naples, FL, US	 Good!
1085:	Brandon, VT, US	 Good!
1086:	Wichita Falls, TX, US	 Good!
1087:	Pinehurst, NC, US	 Good!
1088:	Shonto, AZ, US	 Good!
1089:	Williamsburg, VA, US	 Good!
1090:	Georgetown, DE, US	 Good!
1091:	Henrico, VA, US	 Good!
1092:	Grafton, WV, US	 Good!
1093:	Stanford, CA, US	 Good!
1094:	Jim Falls, WI, US	 Good!
1095:	Nellis AFB, NV, US	 Good!
1096:	Patchogue, NY, US	 Good!
1097:	West Sacramento, CA, US	 Good!
1098:	Hilton Head Island, SC, US	 Good!
1099:	Lenoir City, TN, US	 Good!
1100:	Waldport, OR, US	 Good!
1101:	Las Cruces, NM, US	 Good!
1102:	Reserve, LA, US	 Good!
1103:	Bristol, IN, US	 Good!
1104:	Miami County, KS, US	 Good!
1105:	Hawley, PA, US	 Good!
1106:	Snohomish, WA, US	 Good!
1107:	Springboro, OH, US	 Good!
1108:	East Moline, IL, US	 Good!
1109:	Tooele, UT, US	 Good!
1110:	Redding, CA, US	 Good!
1111:	Pocatello, ID, US	 Good!
1112:	Jacksonville, FL, US	 Good!
1113:	Murphys, CA, US	 Good!
1114:	North Billerica, MA, US	 Go

1344:	Concord, CA, US	 Good!
1345:	Searcy, AR, US	 Good!
1346:	Lake George, NY, US	 Good!
1347:	Oak Park Heights, MN, US	 Good!
1348:	Columbiana, OH, US	 Good!
1349:	Manalapan, NJ, US	 Good!
1350:	Versailles, OH, US	 Good!
1351:	Norwell, MA, US	 Good!
1352:	Belton, MO, US	 Good!
1353:	Rocky Hill, CT, US	 Good!
1354:	West Grove, PA, US	 Good!
1355:	Chardon, OH, US	 Good!
1356:	Sylacauga, AL, US	 Good!
1357:	East Bernard, TX, US	 Good!
1358:	Williamston, NC, US	 Good!
1359:	Cardiff-by-the-Sea, CA, US	 Good!
1360:	Pacific, MO, US	 Good!
1361:	Gladstone, OR, US	 Good!
1362:	Minco, OK, US	 Good!
1363:	National, MD, US	 Good!
1364:	Picatinny Arsenal, NJ, US	 Good!
1365:	Orangevale, CA, US	 Good!
1366:	Pelican Rapids, MN, US	 Good!
1367:	North Hampton, NH, US	 Good!
1368:	Roswell, GA, US	 Good!
1369:	Grannis, AR, US	 Good!
1370:	Long Valley, NJ, US	 Good!
1371:	Hollywood, FL, US	 Good!
1372:	Ridgeland, SC, US	 Good!
1373:	Tahlequah, OK, US	 Good!
1374:	Lake County, CA, US	 Good!
1375:	Grand F

1603:	Township of Galloway, NJ, US	 Good!
1604:	Highland Heights, KY, US	 Good!
1605:	Folcroft, PA, US	 Good!
1606:	Crawfordsville, IN, US	 Good!
1607:	Norfolk, VA, US	 Good!
1608:	Brighton, MA, US	 Good!
1609:	Blountville, TN, US	 Good!
1610:	Papillion, NE, US	 Good!
1611:	San Bernardino, CA, US	 Good!
1612:	Peoria, AZ, US	 Good!
1613:	Andover, MA, US	 Good!
1614:	Mount Angel, OR, US	 Good!
1615:	Belfast, ME, US	 Good!
1616:	Jefferson County, KY, US	 Good!
1617:	Whiteville, NC, US	 Good!
1618:	Nottingham, MD, US	 Good!
1619:	Lorton, VA, US	 Good!
1620:	Dresser, WI, US	 Good!
1621:	Kearny, NJ, US	 Good!
1622:	Zanesville, OH, US	 Good!
1623:	Marion, IN, US	 Good!
1624:	Baldwin Park, CA, US	 Good!
1625:	Waterford, MI, US	 Good!
1626:	Spirit Lake, IA, US	 Good!
1627:	Carmel, IN, US	 Good!
1628:	Beaver Falls, PA, US	 Good!
1629:	Westminster, MA, US	 Good!
1630:	McGuire AFB, NJ, US	 Good!
1631:	Barberton, OH, US	 Good!
1632:	Jamaica Plain, MA, US	 Good!
1633:	Gainesville, FL, US	 Good!
1634

1862:	Detroit Lakes, MN, US	 Good!
1863:	Buffalo, MN, US	 Good!
1864:	Cottonwood, AZ, US	 Good!
1865:	Franklin Park, PA, US	 Good!
1866:	Lake Park, GA, US	 Good!
1867:	Lynnfield, MA, US	 Good!
1868:	Campbellsville, KY, US	 Good!
1869:	Freeport, NY, US	 Good!
1870:	Santa Rosa Beach, FL, US	 Good!
1871:	Cape May Court House, NJ, US	 Good!
1872:	Pulaski, VA, US	 Good!
1873:	Barronett, WI, US	 Good!
1874:	Temple, TX, US	 Good!
1875:	Burton, MI, US	 Good!
1876:	Sylvania, OH, US	 Good!
1877:	Monticello, MN, US	 Good!
1878:	Pawnee City, NE, US	 Good!
1879:	Marietta, OK, US	 Good!
1880:	Pinckney, MI, US	 Good!
1881:	Woodbury, NY, US	 Good!
1882:	Broken Arrow, OK, US	 Good!
1883:	Emporia, VA, US	 Good!
1884:	Knightdale, NC, US	 Good!
1885:	Sandy, UT, US	 Good!
1886:	Mebane, NC, US	 Good!
1887:	Tuscaloosa County, AL, US	 Good!
1888:	Runnemede, NJ, US	 Good!
1889:	Canton, GA, US	 Good!
1890:	Tenafly, NJ, US	 Good!
1891:	Stroh, IN, US	 Good!
1892:	Croton-on-Hudson, NY, US	 Good!
1893:	Exeter, PA, 

2123:	McKeesport, PA, US	 Good!
2124:	Evansville, IN, US	 Good!
2125:	Telluride, CO, US	 Good!
2126:	Annapolis, MD, US	 Good!
2127:	North Las Vegas, NV, US	 Good!
2128:	Paragould, AR, US	 Good!
2129:	Clermont, FL, US	 Good!
2130:	Hemet, CA, US	 Good!
2131:	Lindale, TX, US	 Good!
2132:	Big Rapids, MI, US	 Good!
2133:	Hamilton, AL, US	 Good!
2134:	Bardstown, KY, US	 Good!
2135:	Somerset, WI, US	 Good!
2136:	Buffalo Grove, IL, US	 Good!
2137:	Cocoa Beach, FL, US	 Good!
2138:	Billings, MT, US	 Good!
2139:	Coventry, RI, US	 Good!
2140:	Aurora, OH, US	 Good!
2141:	Illiopolis, IL, US	 Good!
2142:	Bristol, CT, US	 Good!
2143:	Scottsdale, AZ, US	 Good!
2144:	Lytle, TX, US	 Good!
2145:	La Mirada, CA, US	 Good!
2146:	Milan, IL, US	 Good!
2147:	Wyomissing, PA, US	 Good!
2148:	Oakland, CA, US	 Good!
2149:	Lathrop, CA, US	 Good!
2150:	Killeen, TX, US	 Good!
2151:	Andover, KS, US	 Good!
2152:	Glen Cove, NY, US	 Good!
2153:	New Haven, CT, US	 Good!
2154:	Santa Maria, CA, US	 Good!
2155:	Wayland, MI, U

2389:	Defuniak Springs, FL, US	 Good!
2390:	Valley Stream, NY, US	 Good!
2391:	Malden, MO, US	 Good!
2392:	Athens, OH, US	 Good!
2393:	Cincinnati, OH, US	 Good!
2394:	Texarkana, AR, US	 Good!
2395:	Hot Springs, AR, US	 Good!
2396:	Englewood, CO, US	 Good!
2397:	Brielle, NJ, US	 Good!
2398:	Mason, TN, US	 Good!
2399:	Horsham, PA, US	 Good!
2400:	Newell, WV, US	 Good!
2401:	Kane County, IL, US	 Good!
2402:	Lincoln, NE, US	 Good!
2403:	Tustin, CA, US	 Good!
2404:	Berea, OH, US	 Good!
2405:	Mc Kees Rocks, PA, US	 Good!
2406:	Wintersville, OH, US	 Good!
2407:	Natchitoches, LA, US	 Good!
2408:	Hialeah Estates, FL, US	 Good!
2409:	Bessemer, AL, US	 Good!
2410:	N Huntingdon, PA, US	 Good!
2411:	Auberry, CA, US	 Good!
2412:	Waycross, GA, US	 Good!
2413:	Marana, AZ, US	 Good!
2414:	Saint Paul, MN, US	 Good!
2415:	Marion, TX, US	 Good!
2416:	Lynwood, CA, US	 Good!
2417:	Succasunna, NJ, US	 Good!
2418:	Randolph, MA, US	 Good!
2419:	Edgewood, KY, US	 Good!
2420:	Boothwyn, PA, US	 Good!
2421:	Anthon

2654:	New Fairfield, CT, US	 Good!
2655:	Walpole, MA, US	 Good!
2656:	Palestine, TX, US	 Good!
2657:	Berlin, VT, US	 Good!
2658:	Chicago Heights, IL, US	 Good!
2659:	Fayetteville, NY, US	 Good!
2660:	South Holland, IL, US	 Good!
2661:	Kinston, NC, US	 Good!
2662:	Oahu Island, HI, US	 Good!
2663:	Pilot Mountain, NC, US	 Good!
2664:	College Point, NY, US	 Good!
2665:	Champaign, IL, US	 Good!
2666:	Clinton, CT, US	 Good!
2667:	Munising, MI, US	 Good!
2668:	Menomonie, WI, US	 Good!
2669:	Laytonsville, MD, US	 Good!
2670:	Long Branch, NJ, US	 Good!
2671:	Rosenberg, TX, US	 Good!
2672:	Westerville, OH, US	 Good!
2673:	Bala-Cynwyd, PA, US	 Good!
2674:	Gary, IN, US	 Good!
2675:	Owosso, MI, US	 Good!
2676:	Ashland, CA, US	 Good!
2677:	Hurlburt Field, FL, US	 Good!
2678:	Crystal City, MO, US	 Good!
2679:	Liberty, MO, US	 Good!
2680:	Sundance, WY, US	 Good!
2681:	Arlington, VA, US	 Good!
2682:	Wrightsville, PA, US	 Good!
2683:	Kailua, HI, US	 Good!
2684:	Jensen Beach, FL, US	 Good!
2685:	Andrews 

2914:	Burke County, NC, US	 Good!
2915:	Fayville, MA, US	 Good!
2916:	Flower Mound, TX, US	 Good!
2917:	Atlanta, MI, US	 Good!
2918:	Cambria, CA, US	 Good!
2919:	Beach City, OH, US	 Good!
2920:	Grand Haven, MI, US	 Good!
2921:	Millcreek, UT, US	 Good!
2922:	Sandusky, MI, US	 Good!
2923:	Lexington, KY, US	 Good!
2924:	Amery, WI, US	 Good!
2925:	South Lake Tahoe, CA, US	 Good!
2926:	Andrews, TX, US	 Good!
2927:	Decatur, TX, US	 Good!
2928:	Durant, OK, US	 Good!
2929:	Boulder, CO, US	 Good!
2930:	Oak Brook, IL, US	 Good!
2931:	Kempton, PA, US	 Good!
2932:	Sebastian, FL, US	 Good!
2933:	Macclenny, FL, US	 Good!
2934:	Norfolk, MA, US	 Good!
2935:	Swartz Creek, MI, US	 Good!
2936:	Upland, CA, US	 Good!
2937:	Emmetsburg, IA, US	 Good!
2938:	Somerville, MA, US	 Good!
2939:	Wilson County, NC, US	 Good!
2940:	Arden Hills, MN, US	 Good!
2941:	Melrose Park, IL, US	 Good!
2942:	Binghamton, NY, US	 Good!
2943:	Monticello, IA, US	 Good!
2944:	Chowchilla, CA, US	 Good!
2945:	Des Moines, IA, US	 Good!


3177:	Moorestown, NJ, US	 Good!
3178:	Lansing, IL, US	 Good!
3179:	Decorah, IA, US	 Good!
3180:	Perry, NY, US	 Good!
3181:	Niles, IL, US	 Good!
3182:	Miami Lakes, FL, US	 Good!
3183:	Bonita, CA, US	 Good!
3184:	Joliet, IL, US	 Good!
3185:	Athol, MA, US	 Good!
3186:	Fort Benning, GA, US	 Good!
3187:	North Kansas City, MO, US	 Good!
3188:	Lansdale, PA, US	 Good!
3189:	Dickson, TN, US	 Good!
3190:	Santa Cruz, CA, US	 Good!
3191:	North Manchester, IN, US	 Good!
3192:	Mount Airy, MD, US	 Good!
3193:	Arnold, CA, US	 Good!
3194:	Lockbourne, OH, US	 Good!
3195:	Junction City, KS, US	 Good!
3196:	Bristol, PA, US	 Good!
3197:	Keller, TX, US	 Good!
3198:	Pawtucket, RI, US	 Good!
3199:	Tabernacle, NJ, US	 Good!
3200:	Collinsville, IL, US	 Good!
3201:	Arcata, CA, US	 Good!
3202:	Fresh Meadows, NY, US	 Good!
3203:	Cockeysville, MD, US	 Good!
3204:	Edmonds, WA, US	 Good!
3205:	Washington, NC, US	 Good!
3206:	Orrville, OH, US	 Good!
3207:	El Centro, CA, US	 Good!
3208:	Gothenburg, NE, US	 Good!
3209:	

3441:	North Adams, MA, US	 Good!
3442:	Dodgeville, WI, US	 Good!
3443:	Macdill AFB, FL, US	 Good!
3444:	Matthews, NC, US	 Good!
3445:	Lafayette, CO, US	 Good!
3446:	New Prague, MN, US	 Good!
3447:	Lancaster, SC, US	 Good!
3448:	Gordonville, PA, US	 Good!
3449:	Newtown Square, PA, US	 Good!
3450:	Melvindale, MI, US	 Good!
3451:	Anchorage, AK, US	 Good!
3452:	Lutherville-Timonium, MD, US	 Good!
3453:	Jackson, MI, US	 Good!
3454:	New Albany, IN, US	 Good!
3455:	Gardner, MA, US	 Good!
3456:	Addison, IL, US	 Good!
3457:	Saco, ME, US	 Good!
3458:	Portage, WI, US	 Good!
3459:	Monroe, NY, US	 Good!
3460:	Lubbock, TX, US	 Good!
3461:	Laredo, TX, US	 Good!
3462:	Wolfeboro, NH, US	 Good!
3463:	Goshen, NY, US	 Good!
3464:	Presque Isle, ME, US	 Good!
3465:	Bennington, VT, US	 Good!
3466:	Chatham, MA, US	 Good!
3467:	Shenandoah, PA, US	 Good!
3468:	Petersburg, VA, US	 Good!
3469:	Jordan, MN, US	 Good!
3470:	Maui, HI, US	 Good!
3471:	El Campo, TX, US	 Good!
3472:	Cerritos, CA, US	 Good!
3473:	Newton,

3707:	Baytown, TX, US	 Good!
3708:	Aptos, CA, US	 Good!
3709:	Cheektowaga, NY, US	 Good!
3710:	New Richmond, WI, US	 Good!
3711:	Corte Madera, CA, US	 Good!
3712:	Calera, AL, US	 Good!
3713:	Sheffield, IA, US	 Good!
3714:	Big Spring, TX, US	 Good!
3715:	Yreka, CA, US	 Good!
3716:	Ithaca, NY, US	 Good!
3717:	Bethesda, MD, US	 Good!
3718:	Fort Rucker, AL, US	 Good!
3719:	Westminster, CA, US	 Good!
3720:	Uniontown, OH, US	 Good!
3721:	Township of Woodbridge, NJ, US	 Good!
3722:	Stafford Springs, CT, US	 Good!
3723:	Watertown, NY, US	 Good!
3724:	Halethorpe, MD, US	 Good!
3725:	Central, MO, US	 Good!
3726:	Impact, TX, US	 Good!
3727:	Beech Island, SC, US	 Good!
3728:	Wheeling, IL, US	 Good!
3729:	Baxter Springs, KS, US	 Good!
3730:	Alys Beach, FL, US	 Good!
3731:	Fairbanks, AK, US	 Good!
3732:	Seaside, CA, US	 Good!
3733:	Hobart, WI, US	 Good!
3734:	Auburn Hills, MI, US	 Good!
3735:	Bronx, NY, US	 Good!
3736:	Springfield, KY, US	 Good!
3737:	Beltsville, MD, US	 Good!
3738:	Roslyn Heights, 

3969:	Cherry Hill, NJ, US	 Good!
3970:	Ontario, NY, US	 Good!
3971:	Livonia, MI, US	 Good!
3972:	Chehalis, WA, US	 Good!
3973:	Saint Simons Island, GA, US	 Good!
3974:	Plainfield, IN, US	 Good!
3975:	Northport, AL, US	 Good!
3976:	Reynoldsburg, OH, US	 Good!
3977:	Plankinton, SD, US	 Good!
3978:	Bentonville, AR, US	 Good!
3979:	Hannibal, MO, US	 Good!
3980:	Fullerton, CA, US	 Good!
3981:	Saint Andrews, SC, US	 Good!
3982:	Rootstown, OH, US	 Good!
3983:	Raymore, MO, US	 Good!
3984:	Quincy, IL, US	 Good!
3985:	Feeding Hills, MA, US	 Good!
3986:	Youngstown, OH, US	 Good!
3987:	Mesquite, NV, US	 Good!
3988:	Gillette, WY, US	 Good!
3989:	Glen Rock, NJ, US	 Good!
3990:	Lehighton, PA, US	 Good!
3991:	Hood River, OR, US	 Good!
3992:	Island Lake, IL, US	 Good!
3993:	Bridgewater, MA, US	 Good!
3994:	Decatur, GA, US	 Good!
3995:	Kirksville, MO, US	 Good!
3996:	Forsyth, MT, US	 Good!
3997:	Nicholasville, KY, US	 Good!
3998:	Shrewsbury, MA, US	 Good!
3999:	Queen Creek, AZ, US	 Good!
4000:	Sunrise, 

4230:	Key West, FL, US	 Good!
4231:	Tewksbury, MA, US	 Good!
4232:	Laurel, MT, US	 Good!
4233:	Louisville, CO, US	 Good!
4234:	Lagrange, IN, US	 Good!
4235:	Carver, MN, US	 Good!
4236:	Latrobe, PA, US	 Good!
4237:	Kasson, MN, US	 Good!
4238:	Fresno County, CA, US	 Good!
4239:	Shawnee, OK, US	 Good!
4240:	Hanover, MD, US	 Good!
4241:	Klamath Falls, OR, US	 Good!
4242:	Grand Forks, ND, US	 Good!
4243:	Spearfish, SD, US	 Good!
4244:	Westtown, PA, US	 Good!
4245:	Dundee, NY, US	 Good!
4246:	Holliston, MA, US	 Good!
4247:	Charlestown, IN, US	 Good!
4248:	Holly Springs, NC, US	 Good!
4249:	Long Island, NY, US	 Good!
4250:	Sterling, NE, US	 Good!
4251:	Lolo, MT, US	 Good!
4252:	Green Bay, WI, US	 Good!
4253:	Madisonville, LA, US	 Good!
4254:	Jefferson, WI, US	 Good!
4255:	Edgerton, OH, US	 Good!
4256:	Pensacola, FL, US	 Good!
4257:	New Castle, PA, US	 Good!
4258:	Longview, WA, US	 Good!
4259:	Chipley, FL, US	 Good!
4260:	Wendell, NC, US	 Good!
4261:	Northbrook, IL, US	 Good!
4262:	Keyport, WA

4495:	Township of Wall, NJ, US	 Good!
4496:	Aberdeen, MD, US	 Good!
4497:	Kenner, LA, US	 Good!
4498:	Louisville, KY, US	 Good!
4499:	Attleboro, MA, US	 Good!
4500:	Costa Mesa, CA, US	 Good!
4501:	Barnstable County, MA, US	 Good!
4502:	Bridgeport, CT, US	 Good!
4503:	Perry, GA, US	 Good!
4504:	Kerrville, TX, US	 Good!
4505:	Englewood Cliffs, NJ, US	 Good!
4506:	Brownsburg, IN, US	 Good!
4507:	Hiram, GA, US	 Good!
4508:	Bartlett, TN, US	 Good!
4509:	Ottawa, KS, US	 Good!
4510:	Hempstead, NY, US	 Good!
4511:	San Diego, CA, US	 Good!
4512:	Brewster, NY, US	 Good!
4513:	Greenville, NC, US	 Good!
4514:	Richmond, CA, US	 Good!
4515:	Bergen, NY, US	 Good!
4516:	Parkersburg, WV, US	 Good!
4517:	Findlay, OH, US	 Good!
4518:	Merritt Island, FL, US	 Good!
4519:	Stillwater, MN, US	 Good!
4520:	Warrenville, IL, US	 Good!
4521:	Springfield, CO, US	 Good!
4522:	Plainview, NE, US	 Good!
4523:	Rye, NY, US	 Good!
4524:	Billerica, MA, US	 Good!
4525:	Wyalusing, PA, US	 Good!
4526:	Industrial, WV, US	 Goo

4758:	Beaufort, SC, US	 Good!
4759:	Oxford, NC, US	 Good!
4760:	Hartford, CT, US	 Good!
4761:	Salt Lake City, UT, US	 Good!
4762:	Township of Lakewood, NJ, US	 Good!
4763:	Slatington, PA, US	 Good!
4764:	New Braunfels, TX, US	 Good!
4765:	Piketon, OH, US	 Good!
4766:	Beatrice, NE, US	 Good!
4767:	Collinsville, VA, US	 Good!
4768:	York County, SC, US	 Good!
4769:	Boca Raton, FL, US	 Good!
4770:	Rancho Cordova, CA, US	 Good!
4771:	Toms River, NJ, US	 Good!
4772:	Medford, OK, US	 Good!
4773:	Raritan, NJ, US	 Good!
4774:	Pinson, AL, US	 Good!
4775:	Atoka, OK, US	 Good!
4776:	Baltimore, MD, US	 Good!
4777:	Daingerfield, TX, US	 Good!
4778:	The Colony, TX, US	 Good!
4779:	Elmwood Park, NJ, US	 Good!
4780:	Allendale, NJ, US	 Good!
4781:	Abilene, TX, US	 Good!
4782:	Harmans, MD, US	 Good!
4783:	Ocean City, NJ, US	 Good!
4784:	Arverne, NY, US	 Good!
4785:	Bremen, GA, US	 Good!
4786:	Omaha, NE, US	 Good!
4787:	Oxford, MI, US	 Good!
4788:	Somerset, MA, US	 Good!
4789:	Brooklyn, OH, US	 Good!
4790

5022:	Perris, CA, US	 Good!
5023:	Tuntutuliak, AK, US	 Good!
5024:	Yucca Valley, CA, US	 Good!
5025:	Oxford, FL, US	 Good!
5026:	Napa, CA, US	 Good!
5027:	Upton, MA, US	 Good!
5028:	Medford, MA, US	 Good!
5029:	Hamel, MN, US	 Good!
5030:	Chino, CA, US	 Good!
5031:	Oswego, IL, US	 Good!
5032:	New Stanton, PA, US	 Good!
5033:	Ocean Springs, MS, US	 Good!
5034:	Avon, MA, US	 Good!
5035:	Lowell, MA, US	 Good!
5036:	Mount Prospect, IL, US	 Good!
5037:	Richmond, KY, US	 Good!
5038:	Williston, ND, US	 Good!
5039:	Firestone, CO, US	 Good!
5040:	West Fargo, ND, US	 Good!
5041:	Bluffton, IN, US	 Good!
5042:	San Tan Valley, AZ, US	 Good!
5043:	Watertown, SD, US	 Good!
5044:	Cameron, LA, US	 Good!
5045:	Rosharon, TX, US	 Good!
5046:	Brooklyn Park, MN, US	 Good!
5047:	Reading, MA, US	 Good!
5048:	Hanover, NH, US	 Good!
5049:	Vincennes, IN, US	 Good!
5050:	Coplay, PA, US	 Good!
5051:	Littleton, NC, US	 Good!
5052:	Monroe, LA, US	 Good!
5053:	Pembroke Pines, FL, US	 Good!
5054:	Laramie, WY, US	 Good!

5284:	Mount Kisco, NY, US	 Good!
5285:	Mobridge, SD, US	 Good!
5286:	Galesburg, IL, US	 Good!
5287:	Ypsilanti, MI, US	 Good!
5288:	Wahpeton, ND, US	 Good!
5289:	Harrison, AR, US	 Good!
5290:	Fort Washington, PA, US	 Good!
5291:	Auburn, AL, US	 Good!
5292:	Monongahela, PA, US	 Good!
5293:	Springfield, OH, US	 Good!
5294:	Lexington, MA, US	 Good!
5295:	Macon, GA, US	 Good!
5296:	McLean, VA, US	 Good!
5297:	Bend, OR, US	 Good!
5298:	Hopkins, MN, US	 Good!
5299:	Jefferson, GA, US	 Good!
5300:	Munster, IN, US	 Good!
5301:	Boone, NC, US	 Good!
5302:	Ida Grove, IA, US	 Good!
5303:	Aurora, IL, US	 Good!
5304:	Hampton Township, PA, US	 Good!
5305:	Sykesville, MD, US	 Good!
5306:	North Myrtle Beach, SC, US	 Good!
5307:	Middleton, WI, US	 Good!
5308:	Brooksville, MS, US	 Good!
5309:	Tamiment, PA, US	 Good!
5310:	Gladstone, MI, US	 Good!
5311:	Conover, NC, US	 Good!
5312:	Great Lakes, IL, US	 Good!
5313:	Simsbury, CT, US	 Good!
5314:	Broward County, FL, US	 Good!
5315:	Allen, TX, US	 Good!
5316:	D

In [19]:
locations_df.head()

Unnamed: 0,location,city,state,latitude,longitude
0,"Emmitsburg, MD",Emmitsburg,MD,39.704542,-77.326931
1,"Valencia, PA",Valencia,PA,40.674863,-79.988494
2,"Franklin Lakes, NJ",Franklin Lakes,NJ,41.008423,-74.212234
3,"River Falls, WI",River Falls,WI,44.859511,-92.62659
4,"Baird, TX",Baird,TX,32.394856,-99.394051


In [20]:
# Merging clean_df and locations_df 
merged_df = pd.merge(clean_df, locations_df, on=['city', 'state'], how='left')
merged_df.head()

Unnamed: 0,uniq_id,job_title,category,company_name,city,state,country,post_date,job_type,inferred_salary_from,inferred_salary_to,inferred_salary_time_unit,location,latitude,longitude
0,19fdcca8fe64fd62515e8e07d98290e4,Material Handler,Transportation & Logistics,Axium Plastics,Ontario,CA,US,11/10/2020,Full-time,,,,"Ontario, CA",34.065846,-117.64843
1,c9b16a4a7b1fe7bbf1cbf25dcc2f4031,Full Time HHA/CNA/PCA Position,Healthcare,Right at Home of the Triangle,Raleigh,NC,US,11/11/2020,Full-time,,,,"Raleigh, NC",35.780398,-78.639099
2,082627b3ebd5ececaeef450318769d72,Senior Retail Supervisor,Retail,Goodwill Industries Of Delaware & D,Holmes,PA,US,10/17/2020,Full-time,,,,"Holmes, PA",39.904279,-75.30852
3,89141ac20e755763933dffa93ef6abc5,Medical Biller / Collector (Wilmington),Administrative,Carolina Dunes Behavioral Health,Wilmington,NC,US,11/30/2020,Full-time,16.0,18.0,hourly,"Wilmington, NC",34.225728,-77.944711
4,62206e5ae69b88bcb0c4f22f3da39dae,Outside Sales Professional,Sales,Terminix,Auburn,AL,US,11/4/2020,Full-time,,,,"Auburn, AL",32.609857,-85.480783


In [21]:
# Getting only the data I need for the map visual
transformed_df = merged_df[['job_title', 'company_name', 'category', 'location', 'latitude', 'longitude']]
transformed_df.head()

Unnamed: 0,job_title,company_name,category,location,latitude,longitude
0,Material Handler,Axium Plastics,Transportation & Logistics,"Ontario, CA",34.065846,-117.64843
1,Full Time HHA/CNA/PCA Position,Right at Home of the Triangle,Healthcare,"Raleigh, NC",35.780398,-78.639099
2,Senior Retail Supervisor,Goodwill Industries Of Delaware & D,Retail,"Holmes, PA",39.904279,-75.30852
3,Medical Biller / Collector (Wilmington),Carolina Dunes Behavioral Health,Administrative,"Wilmington, NC",34.225728,-77.944711
4,Outside Sales Professional,Terminix,Sales,"Auburn, AL",32.609857,-85.480783


In [22]:
indeed_jobs_jsonfile = transformed_df.to_json(orient="index")

In [23]:
# Writing the output to json file 
with open('../resources/indeed_jobs_w.json', 'w') as file:
    file.write(indeed_jobs_jsonfile)