In [1]:
import requests 
import pandas as pd
import matplotlib.pyplot as plt
from config_ka import g_key
import gmaps
import gmaps.datasets


In [2]:
# Reading in Main Dataset pulled by Muse API
file = '../Resources/job_company_merged_data.csv'
df = pd.read_csv(file)
df.head(5)

# Reading in City Coordinates Dataset
coord_file = '../Resources/city_coordinates.csv'
coord_df = pd.read_csv(coord_file)
# Replace Washington DC with "Washington" to match city name in jobs dataset
coord_df.replace(to_replace="Washington DC", value="Washington", inplace=True)

# Reading in salary Dataset
salary_file = '../Resources/Cleaned_Salary_Data.csv'
salary_df = pd.read_csv(salary_file)
# Replace Washington DC with "Washington" to match city name in jobs dataset
salary_df.replace(to_replace="Washington DC", value="Washington", inplace=True)


In [3]:
# adding latitude and longitude coordinates to main df
merged_df = pd.merge(df, coord_df, on="city")
merged_df["Job Title"].unique()

array(['Data Analyst', 'Other', 'Data Scientist'], dtype=object)

In [4]:
# exporting updated master dataset that now contains lat/long coordinates
merged_df.to_csv("../Output/job_company_coords_merged_data.csv", index=False, header=True)

In [5]:
# Creating DF for Data Scientist Jobs
DataScientist_df = merged_df.loc[(merged_df["Job Title"] == "Data Scientist")]
DataScientist_df.shape

(238, 30)

In [6]:
# MERGING IN DATA SCIENTIST SALARY DATA TO BE MERGED WITH MAIN DATASET
ds_salary_df = salary_df.loc[(salary_df["Dimension"] == "Data Scientist")]
ds_salary_df = ds_salary_df.loc[(ds_salary_df["Month"] == "2020-05")]
ds_salary_df.replace(to_replace="NYC", value="New York", inplace=True)
ds_salary_df.head(3)


Unnamed: 0,Metro,Dimension Type,Month,Dimension,Measure,Value,YoY
180,Atlanta,Job Title,2020-05,Data Scientist,Median Base Pay,"$ 111,346",6.1%
182,Boston,Job Title,2020-05,Data Scientist,Median Base Pay,"$ 125,362",6.8%
184,Chicago,Job Title,2020-05,Data Scientist,Median Base Pay,"$ 112,504",7.0%


In [7]:
# Making new data frame to be used in the marker labels
job_count = DataScientist_df.groupby('city')["job name"].count()
job_count_df = pd.DataFrame({"job count" : job_count})


In [8]:
locations_df = merged_df[["Lat", "Lng", "city"]]
locations_df.drop_duplicates('city', inplace = True)
locations_df.sort_values("city", ascending=True, inplace = True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [9]:
# Merging job counts with Lat/Lng Coordinates
job_locations_df = pd.merge(left=job_count_df, right=locations_df, left_on='city', right_on='city')

# Merging in salary data
marker_df = pd.merge(left=job_locations_df, right=ds_salary_df, left_on='city', right_on='Metro')

# Appending in Cost of Living Index by State
marker_df["Cost of Living Index"] = ["89.2", "131.6", "94.5", "91.5", "151.7", "139.1", "101.7","151.7", "110.7", "158.4"]
marker_df["Opportunity Ranking"] = ["6", "7", "5", "3.5", "10", "1", "9","2", "3.5", "8"]

# exporting data science marker csv 
marker_df.to_csv("../Output/data_science_marker.csv", index=False, header=True)

marker_df.head(3)

Unnamed: 0,city,job count,Lat,Lng,Metro,Dimension Type,Month,Dimension,Measure,Value,YoY,Cost of Living Index,Opportunity Ranking
0,Atlanta,3,33.749,-84.388,Atlanta,Job Title,2020-05,Data Scientist,Median Base Pay,"$ 111,346",6.1%,89.2,6
1,Boston,20,42.3601,-71.0589,Boston,Job Title,2020-05,Data Scientist,Median Base Pay,"$ 125,362",6.8%,131.6,7
2,Chicago,16,41.8781,-87.6298,Chicago,Job Title,2020-05,Data Scientist,Median Base Pay,"$ 112,504",7.0%,94.5,5


In [10]:
# SUMMARY MAP FOR DATA SCIENCE JOBS
gmaps.configure(g_key)
fig = gmaps.figure(map_type='SATELLITE')

# Using the template add the job marks to the heatmap
job_locations = marker_df[["Lat", "Lng"]]

# Using the template add the hotel marks to the heatmap
info_box_template = """
<dl>
<dt>Ranking</dt><dd>{Opportunity Ranking}</dd>
<dt>City</dt><dd>{city}</dd>
<dt>Job Count</dt><dd>{job count}</dd>
<dt>Mean Salary</dt><dd>{Value}</dd>
</dl>
"""
job_info = [info_box_template.format(**row) for index, row in marker_df.iterrows()]
markers = gmaps.marker_layer(job_locations, info_box_content=job_info)

plt.savefig('../Output/Data Scientist Summary Map.png')


# Display figure
fig.add_layer(markers)
fig


Figure(layout=FigureLayout(height='420px'))

<Figure size 432x288 with 0 Axes>

In [11]:
# DATA ANALYST SUMMARY MAP
# creating df for only Data Analyst positions
DataAnalyst_df = merged_df.loc[(merged_df["Job Title"] == "Data Analyst")]

#ADDING IN SALARY DATA TO BE MERGED WITH MAIN DATASET
da_salary_df = salary_df.loc[(salary_df["Dimension"] == "Data Analyst")]
da_salary_df = da_salary_df.loc[(da_salary_df["Month"] == "2020-05")]
da_salary_df.replace(to_replace="NYC", value="New York", inplace=True)

#Making new data frame to be used in the marker labels
da_job_count = DataAnalyst_df.groupby('city')["job name"].count()
da_job_count_df = pd.DataFrame({"job count" : da_job_count})

# Merging job counts with Lat/Lng Coordinates
da_job_count_df = pd.merge(left=da_job_count_df, right=locations_df, left_on='city', right_on='city')

# Merging in salary data
da_marker_df = pd.merge(left=da_job_count_df, right=da_salary_df, left_on='city', right_on='Metro')
# da_marker_df["Cost of Living Index"] = ["89.2", "131.6", "94.5", "91.5", "151.7", "139.1", "101.7","151.7", "110.7", "158.4"]
da_marker_df["Opportunity Ranking"] = ["4.5", "7", "3", "2", "10", "1", "8.5","4.5", "6", "8.5"]

# SUMMARY MAP FOR BUSINESS ANALYST JOBS
fig = gmaps.figure(map_type='SATELLITE')

# Using the template add the job marks to the heatmap
job_locations = da_marker_df[["Lat", "Lng"]]

# Using the template add the hotel marks to the heatmap
info_box_template = """
<dl>
<dt>Ranking</dt><dd>{Opportunity Ranking}</dd>
<dt>City</dt><dd>{city}</dd>
<dt>Job Count</dt><dd>{job count}</dd>
<dt>Mean Salary</dt><dd>{Value}</dd>
</dl>
"""
da_job_info = [info_box_template.format(**row) for index, row in da_marker_df.iterrows()]
da_markers = gmaps.marker_layer(job_locations, info_box_content=da_job_info)

plt.savefig('../Output/Data Analyst Summary Map.png')

# exporting data analyst marker csv 
da_marker_df.to_csv("../Output/data_analyst_marker.csv", index=False, header=True)

# Display figure
fig.add_layer(da_markers)
fig


Figure(layout=FigureLayout(height='420px'))

<Figure size 432x288 with 0 Axes>