# EXPORTS JOBS POSTED FROM [web.byui.edu/StudentEmployment/’](web.byui.edu/StudentEmployment/) INTO A CSV FILE

In [1]:
import http.client
import json
import pandas as pd
from bs4 import BeautifulSoup

## REQUEST DATA FROM API ENDPOINT

In [2]:
conn = http.client.HTTPSConnection("web.byui.edu")
 
conn.request("GET", "/studentemployment/api/jobs")

res = conn.getresponse()

data = res.read()

info = data.decode("utf-8")

responseObject = json.loads(info)

data_jobs = pd.DataFrame(responseObject)

In [3]:
type(data)

bytes

## DISPLAY DATA RECEIVED

In [4]:
data_jobs

Unnamed: 0,jobID,title,departmentName,description,summary,displayJob,dateUpdated,startDate,endDate,managerName,...,beginningDate,recruitingStartDate,requireResume,limitApplicants,limitNumber,applicants,jobQuestions,isOnline,allowOnline,jobMajors
0,3647072,PM140 OnlineTutor,Online Student,<p><b><span>The position requires to be traine...,A tutor is needed for the PM 140 (PMPC 140) cl...,True,2022-09-02T21:52:10.6822319,2022-09-02T07:00:00,2022-09-10T07:00:00,Casey McDaniel,...,2022-09-14T07:00:00,2022-04-08T01:00:00,True,False,0,[],[],False,True,[]
1,341563,ACCTG180 OnlineTutor,Campus Student,<p><span>Online Tutor is needed for the ACCTG ...,Online Tutor is needed for the ACCTG 180: Surv...,True,2022-09-02T21:51:44.5045471,2022-09-02T10:00:00,2022-09-09T02:00:00,Casey McDaniel,...,2022-09-14T02:00:00,2018-10-24T01:00:00,True,False,0,[],[],True,False,[]
2,342205,ACCTG180 OnlineTutor,Online Student,<p><span> </span><span>Online Tutor is needed ...,An Online Tutor is needed for ACCTG 180 online...,True,2022-09-02T21:48:02.210794,2022-09-02T19:00:00,2022-09-08T21:00:00,Casey McDaniel,...,2022-09-13T19:00:00,2019-05-02T01:00:00,True,False,0,[],[],False,True,[]
3,2399930,Chat Specialist,Main BSC Support Center,"<p>As a BYU-Idaho Customer Care Specialist, yo...",Your duties include working at a fast pace to ...,True,2022-09-02T21:02:25.4063439,2022-09-01T06:00:00,2022-09-09T18:00:00,Abigayle Ludlow,...,2022-09-12T06:00:00,2021-08-10T01:00:00,True,False,0,[],[],False,False,[]
4,2399931,Email Specialist,Main BSC Support Center,"<p>As a BYU-Idaho Customer Care Specialist, yo...",Your duties include working at a fast pace to ...,True,2022-09-02T21:01:30.7969169,2022-09-01T18:00:00,2022-09-09T18:00:00,Abigayle Ludlow,...,2022-09-12T06:00:00,2021-08-10T01:00:00,True,False,0,[],[],False,False,[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,32596,Chemistry Stockroom/TA/Grader,Chemistry,<p>This is a general application form for any ...,General Application form for all chemistry dep...,True,2022-06-09T20:08:58.2027174,2022-06-09T14:00:00,,Mike Wood,...,,2016-11-11T01:00:00,True,False,0,[],[],False,False,"[{'jobMajorId': 212, 'jobId': 32596, 'majorId'..."
376,341293,Security-Lock up Patrol,Public Safety,<p>Employees will assist with patrolling campu...,,True,2022-02-18T23:30:52.6069815,2021-12-05T07:00:00,,Kevin Rhoades,...,,2018-08-15T01:00:00,True,False,0,[],[],False,False,[]
377,1213,Music Production Team (Music Dept.),Music,<p>This position supports productions for the ...,"Audio, video, and stage production support for...",True,2022-02-10T20:45:26.4490587,2022-01-04T22:00:00,,Tyler McNiven,...,2022-01-05T22:00:00,2016-01-20T01:00:00,True,False,0,[],[],False,False,[]
378,341008,Turcotte TA,Physics,<p>Will help set up lab activities and assist ...,Looking for GESCI 208 Robotics TA- 3 positions...,True,2022-01-03T16:37:48.0842611,2022-01-03T19:00:00,,Stephen Turcotte,...,2022-01-03T19:00:00,2018-04-03T01:00:00,True,False,0,[],[],False,False,[]


## SIMPLE OVERVIEW

In [5]:
print(f'''
    THERE ARE {data_jobs.jobID.nunique()} JOBS.
    HIGHEST PAY JOB:\'{data_jobs[data_jobs.payRate==data_jobs.payRate.max()]['title'].iloc[0]}\' WITH {data_jobs.payRate.max()} DOLLARS AN HOUR.
    ONLINE JOBS: {data_jobs[data_jobs["title"].str.contains('Online')].shape[0]} OUT OF {data_jobs.shape[0]}.
''')


    THERE ARE 380 JOBS.
    HIGHEST PAY JOB:'Stores and Receiving Delivery Driver' WITH 11.58 DOLLARS AN HOUR.
    ONLINE JOBS: 328 OUT OF 380.



## CLEAN THE DATA

In [6]:
col_dates = ['dateUpdated','startDate','endDate','beginningDate','recruitingStartDate']

data_jobs[col_dates] = data_jobs[col_dates].astype('datetime64[ns]')

columns_to_drop = [
    'jobID', # not needed for EDA
    # 'description', # not needed for EDA
    # 'summary', # not needed for EDA
    'displayJob', # single boolean
    'dateUpdated','startDate','endDate', # not needed for display
    'approximateHoursPerWeek', # not consistent
    'positionsAllocated', # not relevant
    'positionsAvailble', # not relevant
    # 'workSchedule', # not needed for EDA
    'requireResume', # not relevant
    'limitApplicants', # not relevant
    'limitNumber', # not relevant
    'applicants', # empty
    'jobQuestions', # empty
    'isOnline', # not accurate
    'allowOnline', # not accurate
    'jobMajors' # not relevant
    ]

data_jobs.drop(columns_to_drop, axis=1, inplace=True)

data_jobs['description'] = data_jobs['description'].apply(lambda x: [p.text.strip() for p in BeautifulSoup(x).find_all('p') if p.text.strip() != ''])

## OPTIONAL: Save as a CSV

In [7]:
# data_jobs.to_csv('StudentEmployment.csv')

## FILTER OUT THE JOBS THAT HAVE 'Online','Custodian', and 'TA' IN THEIR TITLE

In [8]:
# JOBS FILTERED. NOT ONLINE, NOR CUSTODIAN, AND NOR TA.

remove = ['Online','Custodian','TA']

data_filtered = data_jobs[~data_jobs["title"].str.contains('|'.join(remove))].sort_values('payRate',ascending=False).reset_index().drop('index',axis=1)

print(f'{data_filtered.shape[0]} JOBS OUT OF {data_jobs.shape[0]} POSTED JOBS.')

pd.set_option('display.max_rows', None)

data_filtered

38 JOBS OUT OF 380 POSTED JOBS.


Unnamed: 0,title,departmentName,description,summary,managerName,payRate,workSchedule,beginningDate,recruitingStartDate
0,Stores and Receiving Delivery Driver,University Support Services,[The starting pay rate for this position is $1...,This position will drive a delivery truck and ...,Doug Mason,11.58,8am - 12pm,2022-08-26 10:00:00,2019-08-15 01:00:00
1,Health Center Lab Assistant,Student Health Center,"[PRINCIPLE DUTIES, REQUIREMENTS, APPLICATION I...","Assist lab manager in phlebotomy, performance ...",Emma Taylor,11.5,,2022-09-12 18:00:00,2022-04-20 01:00:00
2,Degree Verification Mid Shift,Student Records & Registration,"[Responsibilities include, but are not limited...","Responsibilities include, but are not limited ...",Barbara Clawson,10.7,11 AM-2 PM M-F,2022-09-12 12:00:00,2021-06-04 01:00:00
3,University Aid Processing Specialist,Student Financial Aid,"[Duties: Analyze awarding data, determine awar...",Ensures that all university aid is processed s...,Mike Kelley,10.5,Somewhat flexible. Minimum 20 hours per week.,2022-09-12 09:00:00,2016-06-01 01:00:00
4,Email Specialist,Main BSC Support Center,"[As a BYU-Idaho Customer Care Specialist, you ...",Your duties include working at a fast pace to ...,Abigayle Ludlow,10.4,1pm-5pm,2022-09-12 06:00:00,2021-08-10 01:00:00
5,Chat Specialist,Main BSC Support Center,"[As a BYU-Idaho Customer Care Specialist, you ...",Your duties include working at a fast pace to ...,Abigayle Ludlow,10.4,"7am-10am, 10am-1pm",2022-09-12 06:00:00,2021-08-10 01:00:00
6,Food Services Student Jobs,University Food Services,"[Fast-paced, demanding job. We need dependabl...",Fall Food Service Jobs Start at $10 PER HOUR a...,Fauneil Schultz,10.0,Many Shifts Available,2022-09-16 00:00:00,2022-06-01 01:00:00
7,Data Entry - Alumni,Alumni,[Please read this posting thoroughly and caref...,This position enters data into the Alumni data...,Kathy Godfrey,10.0,Flexible - M-F 8-5 pm,2022-09-14 00:00:00,2022-08-22 01:00:00
8,Student Project Manager,Online Curriculum Development,[The student project manager position undertak...,Assist in managing the audiovisual section of ...,Brian Carter,10.0,Varies,2022-09-09 12:00:00,2016-11-28 01:00:00
9,Systems & Innovation: Product Specialist,Online Quality Assurance,"[Responsibilities, Research educational techno...","As a Product Specialist, you will be expected ...",Corey Moore,9.6,"Monday-Saturday (7 AM to 10 PM, flexible shifts)",2022-09-10 20:00:00,2022-01-21 01:00:00


## OPTIONAL: Save as a CSV

In [9]:
# data_filtered.to_csv('StudentEmployment_filtered.csv')