In [None]:
from IPython.display import display

import json
import os

import plotly.graph_objects as go
import plotly.express as px
import pandas as pd

nlp_dir = os.path.abspath('')
assets_dir = os.path.join(os.path.dirname(nlp_dir), 'paper', 'assets')
os.makedirs(assets_dir, exist_ok=True)

In [None]:
with open(os.path.join(os.path.join(os.path.dirname(nlp_dir), 'results'), 'all.json')) as file:
  results = json.load(file)
df = pd.DataFrame(results)

# Normalize to monthly salary.
df[['salary']] = df[['salary']].applymap(lambda salaries: [v if v < 15000 else v / 12 for v in salaries])

df[['cities']] = df[['location']].applymap(lambda v: v['cities'])
df[['states']] = df[['location']].applymap(lambda v: v['states'])
df = df.drop(columns=['location'])

In [None]:
def occurrences(df, col, name):
  results = pd.Series([value for values in df[col] for value in values]).value_counts(sort=True)
  return pd.DataFrame({name: results.index, 'Count': results.values})

In [None]:
len(df)

In [None]:
certifications_result = occurrences(df, 'certifications', 'Certification')
certifications_result

In [None]:
fig = px.bar(
  certifications_result.sort_values(by='Count'), 
  x='Certification', 
  y='Count', 
  color='Certification', 
  title='Certifications - Total',
  text='Count'
)
fig.update_traces(textposition='outside')
fig.write_image(os.path.join(assets_dir, 'certifications-bar-chart.pdf'))

fig.show()

In [None]:
df[['avg_salary']] = df[['salary']].applymap(lambda v: sum(v) / len(v) if v else None)
df[['max_salary']] = df[['salary']].applymap(lambda v: max(v) if v else None)
df.sort_values(by='max_salary', ascending=False).head(50)

average_salary = df.explode('states')[['states', 'avg_salary']].groupby('states').mean().reset_index().sort_values(by='avg_salary')
average_salary

In [None]:
fig = px.bar(
  average_salary, 
  x='states', 
  y='avg_salary', 
  color='states', 
  title='Average Salary',
  text=round(average_salary['avg_salary']),
)
fig.update_traces(textposition='outside')
fig.write_image(os.path.join(assets_dir, 'average-salary-bar-chart.pdf'))
fig.show()

In [None]:
employment_type_result = occurrences(df, 'employment_type', 'Employment Type')
employment_type_result

In [None]:
fig = px.bar(
  employment_type_result.sort_values(by='Count'), 
  x='Employment Type', 
  y='Count', 
  color='Employment Type', 
  title='Employment Type',
  text='Count'
)
fig.update_traces(textposition='outside')
fig.write_image(os.path.join(assets_dir, 'employment-type-bar-chart.pdf'))

fig.show()

In [None]:
occurrences(df, 'experience', 'Experience').head(50)

In [None]:
occurrences(df, 'cities', 'City')

In [None]:
state_count = occurrences(df, 'states', 'State')

with open(os.path.join(nlp_dir, 'state_population.json')) as file:
  state_population = json.load(file)

state_population = pd.DataFrame(state_population.items(), columns=['State', 'Population'])

state_result = pd.merge(state_count, state_population, left_on='State', right_on='State')
state_result

state_result['Count per Capita'] = state_result['Count'] / state_result['Population']

state_result

In [None]:
fig = px.bar(
  state_result.sort_values(by='Count'), 
  x='State', 
  y='Count', 
  color='State', 
  title='State Distribution - Total',
  text='Count'
)
fig.update_traces(textposition='outside')
fig.write_image(os.path.join(assets_dir, 'location-bar-chart.pdf'))

fig.show()

In [None]:
data = state_result.sort_values(by='Count per Capita')
fig = px.bar(
  data,
  x='State',
  y='Count per Capita',
  color='State',
  title='State Distribution - Normalized to Population',
  text=round(data['Count per Capita'] * 10000000) / 10,
)
fig.update_traces(textposition='outside')
fig.write_image(os.path.join(assets_dir, 'location-per-capita-bar-chart.pdf'))
fig.show()

In [None]:
education_results = [['unknown'] if not degrees else degrees for degrees in list(df['degrees'])]
education_counts = pd.Series([value for values in education_results for value in values]).value_counts(sort=True)


education_data = pd.DataFrame({'education_type': education_counts.index, 'count': education_counts.values})
education_data

In [None]:
education_graph_title = 'Level of Education needed for InfoSec Jobs'

In [None]:
fig = px.bar(education_data, x='education_type', y='count', color='education_type', title=education_graph_title, labels={'count': 'Count', 'education_type':'Type of Education'})
fig.write_image(os.path.join(assets_dir, 'education-bar-chart.pdf'))
fig.show()

In [None]:
fig = px.pie(education_data, labels='education_type', values='count', color='education_type', title=education_graph_title)
fig.write_image(os.path.join(assets_dir, 'education-pie-chart.pdf'))
fig.show()