In [None]:
from io import BytesIO
import zipfile

import ipywidgets
import pandas as pd
import requests

import matplotlib.pyplot as plt
import seaborn as sns

sns.set()

In [None]:
def load_data(year):
    url = 'https://www.bls.gov/oes/special.requests/oesm{}nat.zip'.format(year % 100)
    resp = requests.get(url)
    zf = zipfile.ZipFile(BytesIO(resp.content))
    for fi in zf.filelist:
        if fi.filename.rstrip('x').endswith('_dl.xls'):
            f = zf.read(fi)
            df = pd.read_excel(f)
            df.columns = df.columns.str.lower()
            df['year'] = year
            return df
    raise RuntimeError('Could not find correct Excel file within Zip archive.')

In [None]:
def load_years(years):
    frames = []
    for year in years:
        df = load_data(year)
        frames.append(df)
    return frames

In [None]:
df_all = pd.concat(load_years(range(2011, 2021)))

In [None]:
df_all.info()

In [None]:
df_all['a_mean'].describe()

In [None]:
df_all['mean salary'] = pd.to_numeric(df_all['a_mean'], errors='coerce')

In [None]:
df_all['mean salary'].describe()

In [None]:
df_all['occ_code'].unique()

In [None]:
df_all['occ_code'].unique().shape

In [None]:
major_groups = df_all['occ_code'].str.endswith('0000')

In [None]:
major_groups

In [None]:
sns.lineplot(data=df_all[major_groups], x='year', y='mean salary', hue='occ_title')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)

In [None]:
titles = sorted(list(df_all[major_groups]['occ_title'].unique()))

In [None]:
@ipywidgets.interact(title=titles)
def plot_group(title=titles[0]):
    ind = titles.index(title)
    for t in titles[:ind] + titles[ind+1:] + [title]:
        color = ('r' if t == title else '0.7')
        df = df_all[df_all['occ_title'] == t]
        plt.plot(df['year'], df['mean salary'], c=color)
    plt.xlabel('year')
    plt.ylabel('mean salary')