In [1]:
# pip install geopy 
# pip install Nominatim

In [1]:
import pandas as pd
import time
import json
from geopy.adapters import AioHTTPAdapter
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut

### Reading and cleaning Indeed Jobs

In [4]:
path_to_jobs = ('../resources/indeed_jobs.csv')

In [5]:
# Read raw data
jobs = pd.read_csv(path_to_jobs)
jobs.head()

Unnamed: 0,uniq_id,job_title,category,company_name,city,state,country,post_date,job_type,inferred_salary_from,inferred_salary_to,inferred_salary_time_unit
0,19fdcca8fe64fd62515e8e07d98290e4,Material Handler,Transportation-or-logistics,Axium Plastics,Ontario,CA,US,11/10/2020,Full-time,,,
1,c9b16a4a7b1fe7bbf1cbf25dcc2f4031,Full Time HHA/CNA/PCA Position,Healthcare,Right at Home of the Triangle,Raleigh,NC,US,11/11/2020,Full-time,,,
2,082627b3ebd5ececaeef450318769d72,Senior Retail Supervisor,Retail,Goodwill Industries Of Delaware & D,Holmes,PA,US,10/17/2020,Full-time,,,
3,89141ac20e755763933dffa93ef6abc5,Medical Biller / Collector (Wilmington),Administrative,Carolina Dunes Behavioral Health,Wilmington,NC,US,11/30/2020,Full-time,16.0,18.0,hourly
4,62206e5ae69b88bcb0c4f22f3da39dae,Outside Sales Professional,Sales,Terminix,Auburn,AL,US,11/4/2020,Full-time,,,


In [24]:
# Remove 'Remote' from cities column
clean_df = jobs.drop(jobs.loc[jobs['city'] == 'Remote'].index)

In [25]:
# Drop not needed categories (whole rows)
clean_df.drop(clean_df.loc[clean_df['category'] == " national origin"].index, inplace=True)
clean_df.drop(clean_df.loc[clean_df['category'] == " AFL-CIO 3550.</p>"].index, inplace=True)
clean_df.drop(clean_df.loc[clean_df['category'] == "Government-or-military"].index, inplace=True)
clean_df.drop(clean_df.loc[clean_df['category'] == "['ann arbor']"].index, inplace=True)

In [26]:
# Drop States which are '+'pageId:'+e+' in the records (whole rows, these rows have messed up cities also)
# Need data with Cities and States because this data will go to the map visual
clean_df.drop(clean_df.loc[clean_df['state'].str.len() != 2 ].index, inplace=True)

In [27]:
# Renaming categories for cleaner look (they will show up in map visual)
clean_df['category'].replace({'Accounting-or-finance': 'Accounting & Finance'}, inplace=True)
clean_df['category'].replace({'Arts-or-entertainment-or-publishing': 'Entertainment & Publishing'}, inplace=True)
clean_df['category'].replace({'Banking-or-loans': 'Banking & Loans'}, inplace=True)
clean_df['category'].replace({'Computer-or-internet': 'Computer & Internet'}, inplace=True)
clean_df['category'].replace({'Construction-or-facilities': 'Construction & Facilities'}, inplace=True)
clean_df['category'].replace({'Customer-Service': 'Customer Service'}, inplace=True)
clean_df['category'].replace({'Education-or-training': 'Education & Training'}, inplace=True)
clean_df['category'].replace({'Engineering-or-architecture': 'Engineering & Architecture'}, inplace=True)
clean_df['category'].replace({'Hospitality-or-travel': 'Travel & Hospitality'}, inplace=True)
clean_df['category'].replace({'Human-Resources': 'Human Resources'}, inplace=True)
clean_df['category'].replace({'Law-Enforcement-or-security': 'Law Enforcement & Security'}, inplace=True)
clean_df['category'].replace({'Manufacturing-or-mechanical': 'Manufacturing & Mechanical'}, inplace=True)
clean_df['category'].replace({'Marketing-or-advertising-or-pr': 'Marketing & Advertisement'}, inplace=True)
clean_df['category'].replace({'Non-profit/volunteering': 'Non-Profit & Volunteering'}, inplace=True)
clean_df['category'].replace({'Pharmaceutical/bio-tech': 'Pharmaceutical & Bio-Tech'}, inplace=True)
clean_df['category'].replace({'Real-Estate': 'Real Estate'}, inplace=True)
clean_df['category'].replace({'Restaurant-or-food-Service': 'Restaurant & Food Service'}, inplace=True)
clean_df['category'].replace({'Transportation-or-logistics': 'Transportation & Logistics'}, inplace=True)
clean_df['category'].replace({'Upper-Management-or-consulting': 'Consulting & Upper Management'}, inplace=True)

In [28]:
# Renaming the three job titles to correct names (removing strange characters)
clean_df['job_title'].replace({'CartÃ© Hotel San Diego, a Curio Collection by Hilton': 'Curio Collection by Hilton'}, inplace=True)
clean_df['job_title'].replace({"Harrahâ€™s Cherokee Casino Resort": "Harrah's Cherokee Casino Resort"}, inplace=True)
clean_df['job_title'].replace({'Agua Caliente Casino â€¢ Resort â€¢ Spa': 'Agua Caliente Casino Resort Spa'}, inplace=True)

In [29]:
# Renaming some cities to correct names (some aren't full, some have strange characters)
clean_df['city'].replace({"Aber Prov Grd": "Aberdeen Proving Ground"}, inplace=True)
clean_df['city'].replace({"St. Mary\x27s City": "Saint Marys City"}, inplace=True)
clean_df['city'].replace({"Bangor Trident Base": "Bangor"}, inplace=True)
clean_df['city'].replace({"Normandy Farms Estates": "Blue Bell"}, inplace=True)
clean_df['city'].replace({"Sienna Plant": "Sienna Plantation"}, inplace=True)
clean_df['city'].replace({"JBPHH": "Joint Base Pearl Harbor-Hickam"}, inplace=True)
clean_df['city'].replace({"Coeur d\x27Alene": "Coeur d'Alene"}, inplace=True)
clean_df['city'].replace({"Dallas\u002FFort Worth International Airport": "DFW Airport"}, inplace=True)
clean_df['city'].replace({"Homestead AFB": "Homestead"}, inplace=True)
clean_df['city'].replace({"Wpafb": "Wright-Patterson AFB"}, inplace=True)

In [30]:
# Group by category and see if I have only the categories I need
# AND if city, state & job_title records have equal count 
clean_df.groupby('category').count()

Unnamed: 0_level_0,uniq_id,job_title,company_name,city,state,country,post_date,job_type,inferred_salary_from,inferred_salary_to,inferred_salary_time_unit
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Accounting & Finance,1425,1425,1425,1425,1425,1425,1425,1425,328,327,333
Administrative,3419,3419,3419,3419,3419,3419,3419,3419,802,799,797
Banking & Loans,513,513,513,513,513,513,513,513,77,77,76
Computer & Internet,1884,1884,1884,1884,1884,1884,1884,1884,199,198,197
Construction & Facilities,1760,1760,1760,1760,1760,1760,1760,1760,556,548,558
Consulting & Upper Management,464,464,464,464,464,464,464,464,90,90,91
Customer Service,1252,1252,1252,1252,1252,1252,1252,1252,277,271,275
Education & Training,982,982,982,982,982,982,982,982,329,325,327
Engineering & Architecture,491,491,491,491,491,491,491,491,97,95,98
Entertainment & Publishing,299,299,299,299,299,299,299,299,56,55,55


In [31]:
clean_df.head()

Unnamed: 0,uniq_id,job_title,category,company_name,city,state,country,post_date,job_type,inferred_salary_from,inferred_salary_to,inferred_salary_time_unit
0,19fdcca8fe64fd62515e8e07d98290e4,Material Handler,Transportation & Logistics,Axium Plastics,Ontario,CA,US,11/10/2020,Full-time,,,
1,c9b16a4a7b1fe7bbf1cbf25dcc2f4031,Full Time HHA/CNA/PCA Position,Healthcare,Right at Home of the Triangle,Raleigh,NC,US,11/11/2020,Full-time,,,
2,082627b3ebd5ececaeef450318769d72,Senior Retail Supervisor,Retail,Goodwill Industries Of Delaware & D,Holmes,PA,US,10/17/2020,Full-time,,,
3,89141ac20e755763933dffa93ef6abc5,Medical Biller / Collector (Wilmington),Administrative,Carolina Dunes Behavioral Health,Wilmington,NC,US,11/30/2020,Full-time,16.0,18.0,hourly
4,62206e5ae69b88bcb0c4f22f3da39dae,Outside Sales Professional,Sales,Terminix,Auburn,AL,US,11/4/2020,Full-time,,,


### Getting Latitude & Longitude

In [32]:
# Create new DF which will have lat and long
locations_df = pd.DataFrame(columns = ['location', 'city', 'state', 'latitude', 'longitude'])

coordinates = []

# Geolocator to get coordinates
geolocator = Nominatim(user_agent="JobMapping")

# Creating 'coordinates' list ['city, state']
for index in clean_df.index:
    location = clean_df['city'][index] + ", " + clean_df['state'][index]
    coordinates.append(location)
    
coordinates_unique = list(set(coordinates))
count = 0

# Getting lat and lon for 'coordinates_unique' 
# AND store lat into 'latitude' list & lon into 'longitude' list
for loc in coordinates_unique:
        location = loc.split(', ')[0] + ", " + loc.split(', ')[1]
        print(str(count)+':\t' +location, end='')
        count += 1
        try:
            coord = geolocator.geocode(location, timeout=None)
            if(coord != None):
                locations_df.loc[len(locations_df.index)] = [loc, loc.split(', ')[0], loc.split(', ')[1],coord.latitude,coord.longitude]
                print('\t Good!')
            else:
                print('\t Empty Record!')
        except GeocoderTimedOut as e:
            print("Error: geocode failed on input %s with message %s"%(location, e.message))
            pass
        time.sleep(0.5)

0:	Scottdale, PA	 Good!
1:	Limerick, PA	 Good!
2:	Bedford Heights, OH	 Good!
3:	Asheboro, NC	 Good!
4:	Lyndhurst, OH	 Good!
5:	Manchester, PA	 Good!
6:	Virginia Beach, VA	 Good!
7:	Passaic, NJ	 Good!
8:	Atlantic City, NJ	 Good!
9:	Big Bear City, CA	 Good!
10:	Nederland, TX	 Good!
11:	Pinecrest, FL	 Good!
12:	Orangevale, CA	 Good!
13:	Willits, CA	 Good!
14:	Sandwich, MA	 Good!
15:	Gordon, GA	 Good!
16:	Milbank, SD	 Good!
17:	Gonzales, LA	 Good!
18:	Oswego, IL	 Good!
19:	Conway, NH	 Good!
20:	Parrottsville, TN	 Good!
21:	Flagstaff, AZ	 Good!
22:	Waterford, WI	 Good!
23:	Hartfield, VA	 Good!
24:	Auberry, CA	 Good!
25:	Black Mountain, NC	 Good!
26:	Cookeville, TN	 Good!
27:	Dolton, IL	 Good!
28:	Bella Vista, AR	 Good!
29:	Janesville, WI	 Good!
30:	Atlantic, NC	 Good!
31:	Ivins, UT	 Good!
32:	Denison, IA	 Good!
33:	Farmington Hills, MI	 Good!
34:	Chipley, FL	 Good!
35:	Cheney, WA	 Good!
36:	Washington Court House, OH	 Good!
37:	Bethel Park, PA	 Good!
38:	La Mirada, CA	 Good!
39:	Tamiment, P

313:	Shippensburg, PA	 Good!
314:	Londonderry, NH	 Good!
315:	Milan, IL	 Good!
316:	Robinson, PA	 Good!
317:	Fitzgerald, GA	 Good!
318:	Arnold, CA	 Good!
319:	Kanab, UT	 Good!
320:	Maricopa County, AZ	 Good!
321:	Luke AFB, AZ	 Good!
322:	Paragould, AR	 Good!
323:	Oxford, OH	 Good!
324:	Belvidere, IL	 Good!
325:	Clifton, NJ	 Good!
326:	Easley, SC	 Good!
327:	Woods Cross, UT	 Good!
328:	Clear Brook, VA	 Good!
329:	Jensen Beach, FL	 Good!
330:	Riverton, UT	 Good!
331:	Coburg, OR	 Good!
332:	Chino Hills, CA	 Good!
333:	Cuba, MO	 Good!
334:	Belle Vernon, PA	 Good!
335:	Houma, LA	 Good!
336:	Downingtown, PA	 Good!
337:	McCormick County, SC	 Good!
338:	Roselle, IL	 Good!
339:	Havertown, PA	 Good!
340:	Rancho Dominguez, CA	 Good!
341:	Lakewood, CA	 Good!
342:	Ada, MI	 Good!
343:	Riverwoods, IL	 Good!
344:	Palm City, FL	 Good!
345:	Castle Rock, CO	 Good!
346:	Yardley, PA	 Good!
347:	Parkville, MO	 Good!
348:	Cambridge, MN	 Good!
349:	South Hamilton, MA	 Good!
350:	Hendersonville, NC	 Good!
351:

619:	Clinton, PA	 Good!
620:	Collegedale, TN	 Good!
621:	Lawrence, MA	 Good!
622:	Rangeley, ME	 Good!
623:	Marion, IA	 Good!
624:	Whittier, CA	 Good!
625:	Marquette, MI	 Good!
626:	Wetumpka, AL	 Good!
627:	Saint Matthews, SC	 Good!
628:	Schofield Barracks, HI	 Good!
629:	Pelican Rapids, MN	 Good!
630:	Pinellas Park, FL	 Good!
631:	Harrison, AR	 Good!
632:	Roseland, NJ	 Good!
633:	Sea Island, GA	 Good!
634:	Simi Valley, CA	 Good!
635:	Batavia, NY	 Good!
636:	Roy, UT	 Good!
637:	Mineral Wells, TX	 Good!
638:	Cornelius, NC	 Good!
639:	Stone Mountain, GA	 Good!
640:	Essex, VT	 Good!
641:	Woodland, WA	 Good!
642:	Warrensville Heights, OH	 Good!
643:	Richmond, IN	 Good!
644:	College Park, MD	 Good!
645:	Chandler, AZ	 Good!
646:	Austin, MN	 Good!
647:	Mattawan, MI	 Good!
648:	Fairless Hills, PA	 Good!
649:	Medford, WI	 Good!
650:	Rocky Mount, NC	 Good!
651:	Westwood, MA	 Good!
652:	Minnesota, GA	 Good!
653:	Hudson, FL	 Good!
654:	Bannockburn, IL	 Good!
655:	Levittown, NY	 Good!
656:	Lafayette

929:	La Grande, OR	 Good!
930:	Beach City, OH	 Good!
931:	Chetek, WI	 Good!
932:	Drexel Hill, PA	 Good!
933:	Heidenheimer, TX	 Good!
934:	North Liberty, IA	 Good!
935:	Orange, CT	 Good!
936:	Indiana, PA	 Good!
937:	Glen Rose, TX	 Good!
938:	Brookfield, MO	 Good!
939:	Rutherfordton, NC	 Good!
940:	Pleasant Hill, CA	 Good!
941:	Buckhannon, WV	 Good!
942:	Wayland, MA	 Good!
943:	Alma, TX	 Good!
944:	Barrow, AK	 Good!
945:	Seattle, WA	 Good!
946:	Stratford, CA	 Good!
947:	Fletcher, NC	 Good!
948:	Westport, MA	 Good!
949:	Cartersville, GA	 Good!
950:	Wyalusing, PA	 Good!
951:	North Augusta, SC	 Good!
952:	Torrance, CA	 Good!
953:	Putnam, CT	 Good!
954:	Tarpon Springs, FL	 Good!
955:	Johnstown, OH	 Good!
956:	Curran, MI	 Good!
957:	Succasunna, NJ	 Good!
958:	Bastrop, TX	 Good!
959:	Mesa, AZ	 Good!
960:	New Port Richey, FL	 Good!
961:	Fort Benning, GA	 Good!
962:	Newton, KS	 Good!
963:	Davie, FL	 Good!
964:	Shafter, CA	 Good!
965:	Floresville, TX	 Good!
966:	Mystic, CT	 Good!
967:	Waverly, NC

1233:	Columbia, TN	 Good!
1234:	Brewster, NY	 Good!
1235:	Gilberts, IL	 Good!
1236:	Adel, GA	 Good!
1237:	Fairbanks, AK	 Good!
1238:	Quechee, VT	 Good!
1239:	Bessemer, AL	 Good!
1240:	Dickinson, TX	 Good!
1241:	Temecula Valley, CA	 Good!
1242:	Moraine, OH	 Good!
1243:	Russiaville, IN	 Good!
1244:	Alexander, AR	 Good!
1245:	Wakefield, MA	 Good!
1246:	Saint Albans, VT	 Good!
1247:	Dublin, GA	 Good!
1248:	Monticello, FL	 Good!
1249:	Ferndale, WA	 Good!
1250:	Sebring, FL	 Good!
1251:	Newsoms, VA	 Good!
1252:	Bristol, PA	 Good!
1253:	West Memphis, AR	 Good!
1254:	Yuma, AZ	 Good!
1255:	Lansing, IL	 Good!
1256:	Ayden, NC	 Good!
1257:	Creston, IA	 Good!
1258:	Los Alamos, NM	 Good!
1259:	Corona, CA	 Good!
1260:	Bloomfield, NJ	 Good!
1261:	Lake Hamilton, AR	 Good!
1262:	Scarborough, ME	 Good!
1263:	Granbury, TX	 Good!
1264:	Yukon, OK	 Good!
1265:	Dallas, GA	 Good!
1266:	Hooksett, NH	 Good!
1267:	Freeland, MI	 Good!
1268:	Greenport, NY	 Good!
1269:	Versailles, OH	 Good!
1270:	Granger, IN	 Good!
1

1539:	Clyde, NC	 Good!
1540:	Biloxi, MS	 Good!
1541:	Pleasant Ridge, MI	 Good!
1542:	Slingerlands, NY	 Good!
1543:	Broken Arrow, OK	 Good!
1544:	Knoxville, IA	 Good!
1545:	Loomis, CA	 Good!
1546:	Cedar Point, NC	 Good!
1547:	Lee, MA	 Good!
1548:	Bandon, OR	 Good!
1549:	March AFB, CA	 Good!
1550:	Selinsgrove, PA	 Good!
1551:	Millersville, MD	 Good!
1552:	Carrollton, GA	 Good!
1553:	La Joya, TX	 Good!
1554:	High Point, NC	 Good!
1555:	Charlotte, NC	 Good!
1556:	Cascade Valley, WA	 Good!
1557:	Davidson, NC	 Good!
1558:	Avon Lake, OH	 Good!
1559:	Williamsville, NY	 Good!
1560:	Pampa, TX	 Good!
1561:	Buffalo Grove, IL	 Good!
1562:	Wheeling, IL	 Good!
1563:	Sumner, WA	 Good!
1564:	Ogden, UT	 Good!
1565:	Nahunta, GA	 Good!
1566:	West Nyack, NY	 Good!
1567:	California, MD	 Good!
1568:	Williamsport, PA	 Good!
1569:	Haile, LA	 Good!
1570:	Altamonte Springs, FL	 Good!
1571:	Amarillo, TX	 Good!
1572:	Stafford, KS	 Good!
1573:	Clackamas, OR	 Good!
1574:	Fairburn, GA	 Good!
1575:	Kent, OH	 Good!
157

1838:	Durham, NH	 Good!
1839:	Lakeland, FL	 Good!
1840:	Memphis, TN	 Good!
1841:	Cocoa, FL	 Good!
1842:	Mounds View, MN	 Good!
1843:	Portage, PA	 Good!
1844:	Denton, NC	 Good!
1845:	Township of Brick, NJ	 Good!
1846:	Newry, ME	 Good!
1847:	Church Rock, NM	 Good!
1848:	Bedford Park, IL	 Good!
1849:	Texas, IN	 Good!
1850:	Claycomo, MO	 Good!
1851:	Fort Sam Houston, TX	 Good!
1852:	Calabasas Hills, CA	 Good!
1853:	Jacksboro, TX	 Good!
1854:	Southlake, TX	 Good!
1855:	Belfast, ME	 Good!
1856:	Exton, PA	 Good!
1857:	Laplace, LA	 Good!
1858:	Washington Terrace, UT	 Good!
1859:	Cohasset, MA	 Good!
1860:	Union, MO	 Good!
1861:	Palmdale, CA	 Good!
1862:	Woonsocket, RI	 Good!
1863:	Medley, FL	 Good!
1864:	Moonachie, NJ	 Good!
1865:	Gasport, NY	 Good!
1866:	Hobe Sound, FL	 Good!
1867:	Pinedale, WY	 Good!
1868:	Egg Harbor Township, NJ	 Good!
1869:	North Las Vegas, NV	 Good!
1870:	Berlin, MD	 Good!
1871:	Richmond, CA	 Good!
1872:	McCormick, SC	 Good!
1873:	Miami Gardens, FL	 Good!
1874:	Dorado Muni

2141:	Ashland, WI	 Good!
2142:	Mentone, CA	 Good!
2143:	O'Fallon, MO	 Good!
2144:	Little Canada, MN	 Good!
2145:	Blue Bell, PA	 Good!
2146:	Walker, MI	 Good!
2147:	Burbank, CA	 Good!
2148:	Lebanon, PA	 Good!
2149:	Eagle River, WI	 Good!
2150:	Sidney, OH	 Good!
2151:	Canadian, OK	 Good!
2152:	Ottawa, KS	 Good!
2153:	Middle Point, OH	 Good!
2154:	Broward County, FL	 Good!
2155:	Holly Springs, NC	 Good!
2156:	Haddam, CT	 Good!
2157:	North East, MD	 Good!
2158:	Immokalee, FL	 Good!
2159:	Des Moines, WA	 Good!
2160:	West Hartford, CT	 Good!
2161:	Malta, OH	 Good!
2162:	Christiansburg, VA	 Good!
2163:	Los Angeles County, CA	 Good!
2164:	Nitro, WV	 Good!
2165:	Cleves, OH	 Good!
2166:	Naperville, IL	 Good!
2167:	Sandy Springs, GA	 Good!
2168:	Worcester, MA	 Good!
2169:	Lithonia, GA	 Good!
2170:	Westerville, OH	 Good!
2171:	Pearl Harbor, HI	 Good!
2172:	Kennedy Space Center, FL	 Good!
2173:	Coarsegold, CA	 Good!
2174:	Richmond, UT	 Good!
2175:	Lomira, WI	 Good!
2176:	Ashland, CA	 Good!
2177:	He

2444:	Addison, IL	 Good!
2445:	Bartlett, TN	 Good!
2446:	Columbiana, OH	 Good!
2447:	Wilbraham, MA	 Good!
2448:	La Verne, CA	 Good!
2449:	Hockessin, DE	 Good!
2450:	Channahon, IL	 Good!
2451:	Salida, CA	 Good!
2452:	West Lafayette, IN	 Good!
2453:	Charleston, WV	 Good!
2454:	Babylon, NY	 Good!
2455:	East Palo Alto, CA	 Good!
2456:	Great Falls, MT	 Good!
2457:	Kingdom City, MO	 Good!
2458:	Lansing, MI	 Good!
2459:	Rockport, ME	 Good!
2460:	Capitol Heights, MD	 Good!
2461:	Centreville, VA	 Good!
2462:	Espanola, NM	 Good!
2463:	Lake Worth, FL	 Good!
2464:	Urbana, OH	 Good!
2465:	Cabarrus County, NC	 Good!
2466:	Connell, WA	 Good!
2467:	Brielle, NJ	 Good!
2468:	Defuniak Springs, FL	 Good!
2469:	Kinston, NC	 Good!
2470:	Saint Joseph, MI	 Good!
2471:	Langdon, ND	 Good!
2472:	Johnston, RI	 Good!
2473:	Blue Ash, OH	 Good!
2474:	Arnold, MO	 Good!
2475:	Zephyrhills, FL	 Good!
2476:	Franklin, TN	 Good!
2477:	West Lebanon, NH	 Good!
2478:	Whitmore Lake, MI	 Good!
2479:	Yorktown, VA	 Good!
2480:	Wh

2745:	Waupaca, WI	 Good!
2746:	Azle, TX	 Good!
2747:	Edwardsville, IL	 Good!
2748:	Media, PA	 Good!
2749:	Empire, AL	 Good!
2750:	San Juan Capistrano, CA	 Good!
2751:	Radford, VA	 Good!
2752:	Fridley, MN	 Good!
2753:	Ponchatoula, LA	 Good!
2754:	Gothenburg, NE	 Good!
2755:	Lathrop, CA	 Good!
2756:	Nisswa, MN	 Good!
2757:	Kirksville, MO	 Good!
2758:	West Palm Beach, FL	 Good!
2759:	White Lake, MI	 Good!
2760:	Albrightsville, PA	 Good!
2761:	Warner Robins, GA	 Good!
2762:	Kearny, NJ	 Good!
2763:	Castro Valley, CA	 Good!
2764:	Valatie, NY	 Good!
2765:	Wentzville, MO	 Good!
2766:	Lake Ozark, MO	 Good!
2767:	New Providence, NJ	 Good!
2768:	King, NC	 Good!
2769:	Calhoun, GA	 Good!
2770:	Shoreline, WA	 Good!
2771:	Lancaster, SC	 Good!
2772:	Orangeburg, NY	 Good!
2773:	N Huntingdon, PA	 Good!
2774:	Tustin, CA	 Good!
2775:	Columbia, MS	 Good!
2776:	West Chicago, IL	 Good!
2777:	Woodbury, NY	 Good!
2778:	Ely, MN	 Good!
2779:	Chattanooga, TN	 Good!
2780:	Beach, ND	 Good!
2781:	Stillwater, MN	 Goo

3047:	Oakdale, CA	 Good!
3048:	Glen Raven, NC	 Good!
3049:	New Kensington, PA	 Good!
3050:	West Jordan, UT	 Good!
3051:	Glen Mills, PA	 Good!
3052:	Currituck, NC	 Good!
3053:	Lacey, WA	 Good!
3054:	Henning, TN	 Good!
3055:	Ada, OK	 Good!
3056:	Brunswick, OH	 Good!
3057:	Mansfield Center, CT	 Good!
3058:	Lolo, MT	 Good!
3059:	Sunnyvale, CA	 Good!
3060:	Boonville, IN	 Good!
3061:	Chesapeake Beach, MD	 Good!
3062:	Pilot Mountain, NC	 Good!
3063:	Binghamton, NY	 Good!
3064:	Tallahassee, FL	 Good!
3065:	Jesup, GA	 Good!
3066:	Schenectady, NY	 Good!
3067:	Batesburg, SC	 Good!
3068:	Baxter Springs, KS	 Good!
3069:	North Billerica, MA	 Good!
3070:	Gardnerville, NV	 Good!
3071:	Graceville, FL	 Good!
3072:	Casselberry, FL	 Good!
3073:	Elizabeth, NJ	 Good!
3074:	Kirkwood, MO	 Good!
3075:	Burnham, PA	 Good!
3076:	Smyrna, DE	 Good!
3077:	Nyack, NY	 Good!
3078:	Shreveport, LA	 Good!
3079:	Harrisburg, PA	 Good!
3080:	Mount Pleasant, MI	 Good!
3081:	Dover AFB, DE	 Good!
3082:	Arcadia, CA	 Good!
3083:	

3348:	Whitsett, NC	 Good!
3349:	Port Richey, FL	 Good!
3350:	Richlands, VA	 Good!
3351:	Pearland, TX	 Good!
3352:	Jamul, CA	 Good!
3353:	Knoxville, TN	 Good!
3354:	Barronett, WI	 Good!
3355:	Mesquite, TX	 Good!
3356:	Jacksonville, AR	 Good!
3357:	Aberdeen, MD	 Good!
3358:	Willow Springs, MO	 Good!
3359:	Mansfield, CT	 Good!
3360:	Midlothian, VA	 Good!
3361:	Northampton, MA	 Good!
3362:	Hamptonburgh, NY	 Good!
3363:	Country Club Hills, IL	 Good!
3364:	Town of Waterbury, CT	 Good!
3365:	Lagrange, IN	 Good!
3366:	San Benito, TX	 Good!
3367:	Livingston, NJ	 Good!
3368:	Poway, CA	 Good!
3369:	New Laguna, NM	 Good!
3370:	Covington, TN	 Good!
3371:	Cherokee, NC	 Good!
3372:	Natchitoches, LA	 Good!
3373:	Carthage, MO	 Good!
3374:	Richmond, TX	 Good!
3375:	Arlington Heights, IL	 Good!
3376:	Daleville, AL	 Good!
3377:	Crete, IL	 Good!
3378:	Creve Coeur, MO	 Good!
3379:	Grovetown, GA	 Good!
3380:	Berryville, AR	 Good!
3381:	Ramsey, NJ	 Good!
3382:	East Greenwich, RI	 Good!
3383:	Highland Park, NJ

3648:	Granite City, IL	 Good!
3649:	Dresser, WI	 Good!
3650:	Millbrae, CA	 Good!
3651:	Philadelphia, TN	 Good!
3652:	Marion, VA	 Good!
3653:	Walcott, IA	 Good!
3654:	Hanover, PA	 Good!
3655:	Lexington, KY	 Good!
3656:	Ruckersville, VA	 Good!
3657:	Kimberly, OR	 Good!
3658:	Peoria, AZ	 Good!
3659:	Phillips, WI	 Good!
3660:	Keystone, CO	 Good!
3661:	Essex, MD	 Good!
3662:	Reisterstown, MD	 Good!
3663:	Jasper, IN	 Good!
3664:	Royal Oak, MI	 Good!
3665:	Waukon, IA	 Good!
3666:	Fond du Lac, WI	 Good!
3667:	Township of Galloway, NJ	 Good!
3668:	Highland Park, MI	 Good!
3669:	Township of Lakewood, NJ	 Good!
3670:	North Grosvenordale, CT	 Good!
3671:	Princeton, NJ	 Good!
3672:	Oakland, MD	 Good!
3673:	Lindsay, OK	 Good!
3674:	Grosse Pointe, MI	 Good!
3675:	Midlothian, TX	 Good!
3676:	Arvada, CO	 Good!
3677:	Long Valley, NJ	 Good!
3678:	Ellijay, GA	 Good!
3679:	Port Charlotte, FL	 Good!
3680:	New Smyrna Beach, FL	 Good!
3681:	Gulf Breeze, FL	 Good!
3682:	Lake Winnebago, MO	 Good!
3683:	Cheverly

3949:	Breinigsville, PA	 Good!
3950:	Yonkers, NY	 Good!
3951:	Golden, CO	 Good!
3952:	Fairfield, NJ	 Good!
3953:	Petersham, MA	 Good!
3954:	Salem, VA	 Good!
3955:	Beaufort, SC	 Good!
3956:	Murrieta, CA	 Good!
3957:	Goshen, KY	 Good!
3958:	Silsbee, TX	 Good!
3959:	Wheatland, WY	 Good!
3960:	Bangor, ME	 Good!
3961:	Sweet Home, OR	 Good!
3962:	Newbury Park, CA	 Good!
3963:	Freeland, WA	 Good!
3964:	Berlin, NJ	 Good!
3965:	Jamestown, ND	 Good!
3966:	White Settlement, TX	 Good!
3967:	Bowling Green, KY	 Good!
3968:	Paterson, NJ	 Good!
3969:	Ford City, PA	 Good!
3970:	Black Hawk, CO	 Good!
3971:	Arroyo Grande, CA	 Good!
3972:	Gilroy, CA	 Good!
3973:	Poteau, OK	 Good!
3974:	Marana, AZ	 Good!
3975:	Amboy, IL	 Good!
3976:	Sun Lakes, AZ	 Good!
3977:	N Richland Hills, TX	 Good!
3978:	Bow, NH	 Good!
3979:	Clarksburg, WV	 Good!
3980:	Long Island City, NY	 Good!
3981:	East Peoria, IL	 Good!
3982:	Windsor, CO	 Good!
3983:	Morrisville, VT	 Good!
3984:	Manor, TX	 Good!
3985:	Pontotoc, MS	 Good!
3986:	Pa

4253:	Saint Andrews, SC	 Good!
4254:	Robbinsville, NJ	 Good!
4255:	Victorville, CA	 Good!
4256:	Glenarden, MD	 Good!
4257:	Manville, NJ	 Good!
4258:	Lincolnton, NC	 Good!
4259:	Alpine, TX	 Good!
4260:	Winamac, IN	 Good!
4261:	Oxford, CT	 Good!
4262:	Portsmouth, RI	 Good!
4263:	Anderson, CA	 Good!
4264:	Scappoose, OR	 Good!
4265:	Rockford, IL	 Good!
4266:	Lock Haven, PA	 Good!
4267:	Duncan, SC	 Good!
4268:	Sandusky, MI	 Good!
4269:	Tuscumbia, AL	 Good!
4270:	Gadsden, AL	 Good!
4271:	Baird, TX	 Good!
4272:	Trevose, PA	 Good!
4273:	Robins AFB, GA	 Good!
4274:	Orwigsburg, PA	 Good!
4275:	Oro Valley, AZ	 Good!
4276:	Charter Township of Georgetown, MI	 Good!
4277:	North Highlands, CA	 Good!
4278:	Foster City, CA	 Good!
4279:	Lady Lake, FL	 Good!
4280:	Nipomo, CA	 Good!
4281:	Elmwood Park, NJ	 Good!
4282:	Wolcott, CT	 Good!
4283:	Goshen, IN	 Good!
4284:	Brentwood, CA	 Good!
4285:	Saint Johns, FL	 Good!
4286:	Kermit, TX	 Good!
4287:	Cranbury, NJ	 Good!
4288:	Oelwein, IA	 Good!
4289:	Sullivan, 

4556:	Airway Heights, WA	 Good!
4557:	Charleston County, SC	 Good!
4558:	Helotes, TX	 Good!
4559:	South El Monte, CA	 Good!
4560:	Grant Park, IL	 Good!
4561:	Great Barrington, MA	 Good!
4562:	Vancouver, WA	 Good!
4563:	Blowing Rock, NC	 Good!
4564:	Xenia, OH	 Good!
4565:	Lewiston, NC	 Good!
4566:	Danville, VA	 Good!
4567:	Ringgold, GA	 Good!
4568:	Manhasset, NY	 Good!
4569:	Las Vegas, NV	 Good!
4570:	Pueblo, CO	 Good!
4571:	Front Royal, VA	 Good!
4572:	Meadville, PA	 Good!
4573:	Rural Hall, NC	 Good!
4574:	Annapolis, MD	 Good!
4575:	Mission Hills, CA	 Good!
4576:	Middle River, MD	 Good!
4577:	Salem, IN	 Good!
4578:	Nashville, TN	 Good!
4579:	Darlington, PA	 Good!
4580:	Feasterville-Trevose, PA	 Good!
4581:	West Harrison, IN	 Good!
4582:	Lithia Springs, GA	 Good!
4583:	Harrisburg, OR	 Good!
4584:	Little Rock, AR	 Good!
4585:	Garden City, MO	 Good!
4586:	Tooele, UT	 Good!
4587:	North Chicago, IL	 Good!
4588:	Costa Mesa, CA	 Good!
4589:	Piscataway, NJ	 Good!
4590:	Mishawaka, IN	 Good!
459

4857:	Livonia, NY	 Good!
4858:	Coon Rapids, MN	 Good!
4859:	Saline, MI	 Good!
4860:	Rome, NY	 Good!
4861:	Tishomingo, OK	 Good!
4862:	Coronado, CA	 Good!
4863:	Gallipolis, OH	 Good!
4864:	Snowbird, UT	 Good!
4865:	Ellenton, FL	 Good!
4866:	Louisville, CO	 Good!
4867:	Houston, TX	 Good!
4868:	Kenmore, NY	 Good!
4869:	Gloversville, NY	 Good!
4870:	Kaneohe, HI	 Good!
4871:	Andalusia, AL	 Good!
4872:	Pine Bluff, AR	 Good!
4873:	Haslet, TX	 Good!
4874:	Lafayette Hill, PA	 Good!
4875:	Elgin, IL	 Good!
4876:	Pollock Pines, CA	 Good!
4877:	Malden, MO	 Good!
4878:	Lenoir City, TN	 Good!
4879:	Haubstadt, IN	 Good!
4880:	New Milford, PA	 Good!
4881:	Brattleboro, VT	 Good!
4882:	Geneseo, NY	 Good!
4883:	Grand Prairie, TX	 Good!
4884:	Hampton Township, PA	 Good!
4885:	Angleton, TX	 Good!
4886:	Cleveland, OH	 Good!
4887:	Hanscom AFB, MA	 Good!
4888:	Highlands Ranch, CO	 Good!
4889:	University Place, WA	 Good!
4890:	Geneva, OH	 Good!
4891:	Cottage Grove, MN	 Good!
4892:	Fairfield, OH	 Good!
4893:	Alh

5160:	O'Brien County, IA	 Good!
5161:	Richlands, NC	 Good!
5162:	Oak Park, IL	 Good!
5163:	Wayne, PA	 Good!
5164:	Edison, NJ	 Good!
5165:	Lake Oswego, OR	 Good!
5166:	Panama City, FL	 Good!
5167:	Salisbury, MA	 Good!
5168:	Long Neck, DE	 Good!
5169:	Laguna Beach, CA	 Good!
5170:	Webster, TX	 Good!
5171:	McLean, VA	 Good!
5172:	Shorewood, WI	 Good!
5173:	Waxahachie, TX	 Good!
5174:	Ham Lake, MN	 Good!
5175:	Banner Elk, NC	 Good!
5176:	Barnstable County, MA	 Good!
5177:	Laurel, MT	 Good!
5178:	McConnell AFB, KS	 Good!
5179:	Orange City, FL	 Good!
5180:	Long Island, NY	 Good!
5181:	Charlestown, MA	 Good!
5182:	Bismarck, AR	 Good!
5183:	Henderson, KY	 Good!
5184:	Maple Heights, OH	 Good!
5185:	East Hanover, NJ	 Good!
5186:	Los Angeles, CA	 Good!
5187:	Oklahoma City, OK	 Good!
5188:	Franklin, PA	 Good!
5189:	Saint Louis, MI	 Good!
5190:	Wellsville, UT	 Good!
5191:	Hinckley, MN	 Good!
5192:	Grafton, ND	 Good!
5193:	Azusa, CA	 Good!
5194:	Monroe, WI	 Good!
5195:	Mahwah, NJ	 Good!
5196:	Hutchi

In [33]:
locations_df.head()

Unnamed: 0,location,city,state,latitude,longitude
0,"Scottdale, PA",Scottdale,PA,40.100351,-79.586984
1,"Limerick, PA",Limerick,PA,40.230934,-75.522125
2,"Bedford Heights, OH",Bedford Heights,OH,41.414383,-81.506115
3,"Asheboro, NC",Asheboro,NC,35.707915,-79.813645
4,"Lyndhurst, OH",Lyndhurst,OH,41.520052,-81.488732


In [34]:
# Merging clean_df and locations_df 
merged_df = pd.merge(clean_df, locations_df, on=['city', 'state'], how='left')
merged_df.head()

Unnamed: 0,uniq_id,job_title,category,company_name,city,state,country,post_date,job_type,inferred_salary_from,inferred_salary_to,inferred_salary_time_unit,location,latitude,longitude
0,19fdcca8fe64fd62515e8e07d98290e4,Material Handler,Transportation & Logistics,Axium Plastics,Ontario,CA,US,11/10/2020,Full-time,,,,"Ontario, CA",50.000678,-86.000977
1,c9b16a4a7b1fe7bbf1cbf25dcc2f4031,Full Time HHA/CNA/PCA Position,Healthcare,Right at Home of the Triangle,Raleigh,NC,US,11/11/2020,Full-time,,,,"Raleigh, NC",35.780398,-78.639099
2,082627b3ebd5ececaeef450318769d72,Senior Retail Supervisor,Retail,Goodwill Industries Of Delaware & D,Holmes,PA,US,10/17/2020,Full-time,,,,"Holmes, PA",39.904279,-75.30852
3,89141ac20e755763933dffa93ef6abc5,Medical Biller / Collector (Wilmington),Administrative,Carolina Dunes Behavioral Health,Wilmington,NC,US,11/30/2020,Full-time,16.0,18.0,hourly,"Wilmington, NC",34.225728,-77.944711
4,62206e5ae69b88bcb0c4f22f3da39dae,Outside Sales Professional,Sales,Terminix,Auburn,AL,US,11/4/2020,Full-time,,,,"Auburn, AL",32.609857,-85.480783


In [35]:
# Getting only the data I need for the map visual
transformed_df = merged_df[['job_title', 'company_name', 'category', 'location', 'latitude', 'longitude']]
transformed_df.head()

Unnamed: 0,job_title,company_name,category,location,latitude,longitude
0,Material Handler,Axium Plastics,Transportation & Logistics,"Ontario, CA",50.000678,-86.000977
1,Full Time HHA/CNA/PCA Position,Right at Home of the Triangle,Healthcare,"Raleigh, NC",35.780398,-78.639099
2,Senior Retail Supervisor,Goodwill Industries Of Delaware & D,Retail,"Holmes, PA",39.904279,-75.30852
3,Medical Biller / Collector (Wilmington),Carolina Dunes Behavioral Health,Administrative,"Wilmington, NC",34.225728,-77.944711
4,Outside Sales Professional,Terminix,Sales,"Auburn, AL",32.609857,-85.480783


In [38]:
indeed_jobs_jsonfile = transformed_df.to_json(orient="index")

In [39]:
# Writing the output to json file 
with open('../resources/indeed_jobs.json', 'w') as file:
    file.write(indeed_jobs_jsonfile)