# EXPORTS JOBS POSTED FROM [web.byui.edu/StudentEmployment/’](web.byui.edu/StudentEmployment/) INTO A CSV FILE

In [1]:
import http.client
import json
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime

## REQUEST DATA FROM API ENDPOINT

In [2]:
conn = http.client.HTTPSConnection("web.byui.edu")
 
conn.request("GET", "/studentemployment/api/jobs")

res = conn.getresponse()

data = res.read()

info = data.decode("utf-8")

responseObject = json.loads(info)

data_jobs = pd.DataFrame(responseObject)

In [3]:
data_jobs['URL'] = data_jobs.jobID.apply(lambda x: f'https://web.byui.edu/StudentEmployment/job/{x}')

In [4]:
data_jobs['URL'].iloc[300]

IndexError: single positional indexer is out-of-bounds

In [None]:
pd.options.display.max_columns = None
data_jobs

In [None]:
type(data)

## DISPLAY DATA RECEIVED

In [None]:
pd.options.display.max_columns = None

data_jobs

## SIMPLE OVERVIEW

In [5]:
print(f'''
    THERE ARE {data_jobs.jobID.nunique()} JOBS.
    HIGHEST PAY JOB:\'{data_jobs[data_jobs.payRate==data_jobs.payRate.max()]['title'].iloc[0]}\' WITH {data_jobs.payRate.max()} DOLLARS AN HOUR.
    ONLINE JOBS: {data_jobs[data_jobs["title"].str.contains('Online')].shape[0]} OUT OF {data_jobs.shape[0]}.
''')


    THERE ARE 91 JOBS.
    HIGHEST PAY JOB:'Systems Developer' WITH 12.0 DOLLARS AN HOUR.
    ONLINE JOBS: 52 OUT OF 91.



## CLEAN THE DATA

In [6]:
col_dates = ['dateUpdated','startDate','endDate','beginningDate','recruitingStartDate']

data_jobs[col_dates] = data_jobs[col_dates].astype('datetime64[ns]')

data_jobs.dateUpdated = data_jobs.dateUpdated.apply(lambda x: datetime.strftime(x, "%Y-%m-%d"))

columns_to_drop = [
    'jobID', # not needed for EDA
    # 'description', # not needed for EDA
    # 'summary', # not needed for EDA
    'displayJob', # single boolean
    # 'dateUpdated',
    'startDate','endDate', # not needed for display
    'approximateHoursPerWeek', # not consistent
    'positionsAllocated', # not relevant
    'positionsAvailble', # not relevant
    # 'workSchedule', # not needed for EDA
    'requireResume', # not relevant
    'limitApplicants', # not relevant
    'limitNumber', # not relevant
    'applicants', # empty
    'jobQuestions', # empty
    'isOnline', # not accurate
    'allowOnline', # not accurate
    'jobMajors' # not relevant
    ]

data_jobs.drop(columns_to_drop, axis=1, inplace=True)

data_jobs['description'] = data_jobs['description'].apply(lambda x: [p.text.strip() for p in BeautifulSoup(x).find_all('p') if p.text.strip() != ''])

## OPTIONAL: Save as a CSV

In [7]:
# data_jobs.to_csv('StudentEmployment.csv')

## FILTER OUT THE JOBS THAT HAVE 'Online','Custodian', and 'TA' IN THEIR TITLE

In [8]:
# JOBS FILTERED. NOT ONLINE, NOR CUSTODIAN, AND NOR TA.

remove = ['Online','Custodian','TA']

data_filtered = data_jobs[~data_jobs["title"].str.contains('|'.join(remove))].sort_values('payRate',ascending=False).reset_index().drop('index',axis=1)

print(f'{data_filtered.shape[0]} JOBS OUT OF {data_jobs.shape[0]} POSTED JOBS.')

pd.set_option('display.max_rows', None)

data_filtered

28 JOBS OUT OF 91 POSTED JOBS.


Unnamed: 0,title,departmentName,description,summary,dateUpdated,managerName,payRate,workSchedule,beginningDate,recruitingStartDate,URL
0,Systems Developer,BYU Idaho Support Center,[The BYUI Support Center (BSC) is looking for ...,Do you usually take an innovative approach to ...,2022-09-23,Luisa Silva,12.0,Flexible M-F,NaT,2020-02-04 01:00:00,https://web.byui.edu/StudentEmployment/job/816187
1,Reports and Systems Analyst,BYU Idaho Support Center,[The Reports and Systems Analyst is responsibl...,The Reports and Systems Analyst is responsible...,2022-09-23,Luisa Silva,12.0,M-F Varies,NaT,2020-10-01 01:00:00,https://web.byui.edu/StudentEmployment/job/118...
2,Health Center Lab Assistant,Student Health Center,"[PRINCIPLE DUTIES, REQUIREMENTS, APPLICATION I...","Assist lab manager in phlebotomy, performance ...",2022-09-12,Emma Taylor,11.5,,2022-09-27 00:00:00,2022-04-20 01:00:00,https://web.byui.edu/StudentEmployment/job/371...
3,Phones Specialist,BYU-Idaho Support Center,"[As a BYU-Idaho Phone Team Specialist, you are...",The BYU-Idaho Support Center is the primary su...,2022-09-23,Jared Edwards,10.38,"4pm-6pm, 5pm-8pm",2022-10-04 00:00:00,2022-02-16 01:00:00,https://web.byui.edu/StudentEmployment/job/279...
4,Clinician Aide,Student Health Center,"[PRINCIPLE DUTIES, REQUIREMENTS, APPLICATION I...",Assist health care providers and nursing staff...,2022-09-19,Emma Taylor,10.15,M-F TBD,2022-09-19 18:00:00,2022-03-21 01:00:00,https://web.byui.edu/StudentEmployment/job/349...
5,Food Services Student Jobs,University Food Services,"[Fast-paced, demanding job. We need dependabl...",Fall Food Service Jobs Start at $10 PER HOUR a...,2022-09-22,Fauneil Schultz,10.0,Many Shifts Available,2022-09-23 06:00:00,2022-06-01 01:00:00,https://web.byui.edu/StudentEmployment/job/389...
6,Assessment Consultant,Campus Curriculum Development,[Employees will be trained to assist and work ...,Assessment Services is a branch of Campus Curr...,2022-09-19,Ruben Varela Fuentealba,10.0,Flexible,2022-10-10 06:00:00,2022-09-20 01:00:00,https://web.byui.edu/StudentEmployment/job/390...
7,Grounds Crew 12,Grounds,[Looking for students who can work Fall Semest...,Looking for students who can work Fall Semeste...,2022-09-23,Frederick Haux,9.58,8:00AM-12:00PM,NaT,2016-01-01 01:00:00,https://web.byui.edu/StudentEmployment/job/1738
8,Grounds Crew 6,Grounds,"[Caring for campus grounds and plants, weeding...","Starting ASAP. Shift is 1:00PM to 5:00 PM, Mon...",2022-09-21,Levi Saurey,9.58,"1:00PM to 5:00 PM, Monday-Friday",NaT,2021-05-05 01:00:00,https://web.byui.edu/StudentEmployment/job/205...
9,Grounds Crew 11,Grounds,"[Shift: 6:00AM-10:00AM, Monday-Friday., Must b...",Starting ASAP. Monday-Friday 6:00am to 10:00am...,2022-09-21,Thomas Andersen,9.58,,NaT,2016-01-05 01:00:00,https://web.byui.edu/StudentEmployment/job/702


## OPTIONAL: Save as a CSV

In [None]:
# data_filtered.to_csv('StudentEmployment_filtered.csv')

In [None]:
data_jobs.payRate.value_counts().reset_index().sort_values('index')

In [None]:
data_jobs.groupby('payRate').count()

In [None]:
data_jobs.departmentName.value_counts()

In [None]:
data_jobs[data_jobs.title == 'AGBUS147 OnlineTA'].iloc[0]

In [None]:
pd.options.display.max_columns = None
data_jobs[data_jobs.jobID == 342319]

In [None]:
data_jobs.dateUpdated.iloc[0].strftime("%Y-%m-%d")

In [None]:
import time
ym = time.strftime("%Y-%m-%d")
ym

In [None]:
data_jobs.dateUpdated.iloc[0]

In [None]:
time.strftime("%Y-%m-%d")

In [None]:
from datetime import datetime, timedelta
(datetime.today() - timedelta(1)).strftime("%Y-%m-%d") 

In [None]:
(33/1)

In [None]:
(1/33) - 1

In [None]:
import requests

In [None]:
response = requests.get('https://web.byui.edu/studentemployment/api/jobs')

In [None]:
soup = BeautifulSoup(response.text)

In [None]:
soup.select()