## EXPORTS JOBS POSTED FROM [web.byui.edu/StudentEmployment/’](web.byui.edu/StudentEmployment/) INTO A CSV FILE

In [1]:
import http.client
import json
import pandas as pd
from bs4 import BeautifulSoup

## REQUEST DATA FROM API ENDPOINT

In [2]:
conn = http.client.HTTPSConnection("web.byui.edu")
 
conn.request("GET", "/studentemployment/api/jobs")

res = conn.getresponse()

data = res.read()

info = data.decode("utf-8")

responseObject = json.loads(info)

data_jobs = pd.DataFrame(responseObject)

## DISPLAY DATA RECEIVED

In [3]:
data_jobs

Unnamed: 0,jobID,title,departmentName,description,summary,displayJob,dateUpdated,startDate,endDate,managerName,...,beginningDate,recruitingStartDate,requireResume,limitApplicants,limitNumber,applicants,jobQuestions,isOnline,allowOnline,jobMajors
0,3898580,CONST221 OnlineGrader,Campus Teaching Assistants,<p>An Online Grader is needed for CONST 221: C...,An Online Grader is needed for CONST 221: Cons...,True,2022-08-25T20:02:34.0867609,2022-07-21T12:00:00,2022-09-13T12:00:00,Jodi Robison,...,2022-09-12T12:00:00,2022-06-17T01:00:00,True,False,0,[],[],False,False,[]
1,3898579,CONST221 OnlineGrader,Online Student,<p>An Online Grader is needed for CONST 221: C...,An Online Grader is needed for CONST 221: Cons...,True,2022-08-25T20:02:34.0399431,2022-07-21T12:00:00,2022-09-13T12:00:00,Jodi Robison,...,2022-09-12T12:00:00,2022-06-17T01:00:00,True,False,0,[],[],False,True,[]
2,3898063,Food Services Student Jobs,University Food Services,"<p>Fast-paced, demanding job. We need dependa...",Fall Food Service Jobs Start at $10 PER HOUR a...,True,2022-08-25T19:49:52.4393275,2022-08-25T12:00:00,2022-09-16T00:00:00,Fauneil Schultz,...,2022-09-16T00:00:00,2022-06-01T01:00:00,True,False,0,[],[],False,False,[]
3,3899327,BUS321 OnlineGrader,Campus Teaching Assistants,<p>An Online Grader is needed for BUS 321: Org...,An Online Grader is needed for BUS 321:Organiz...,True,2022-08-25T19:47:21.6083471,2022-08-24T12:00:00,2022-09-10T12:00:00,Jodi Robison,...,2022-09-12T12:00:00,2022-07-05T01:00:00,True,False,0,[],[],False,False,[]
4,3899323,BUS321 OnlineGrader,Online Student,<p>An Online Grader is needed for BUS 321:Orga...,An Online Grader is needed for BUS 321: Organi...,True,2022-08-25T19:47:21.577101,2022-08-24T12:00:00,2022-09-10T12:00:00,Jodi Robison,...,2022-09-12T12:00:00,2022-07-05T01:00:00,True,False,0,[],[],False,True,[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
407,32596,Chemistry Stockroom/TA/Grader,Chemistry,<p>This is a general application form for any ...,General Application form for all chemistry dep...,True,2022-06-09T20:08:58.2027174,2022-06-09T14:00:00,,Mike Wood,...,,2016-11-11T01:00:00,True,False,0,[],[],False,False,"[{'jobMajorId': 212, 'jobId': 32596, 'majorId'..."
408,341293,Security-Lock up Patrol,Public Safety,<p>Employees will assist with patrolling campu...,,True,2022-02-18T23:30:52.6069815,2021-12-05T07:00:00,,Kevin Rhoades,...,,2018-08-15T01:00:00,True,False,0,[],[],False,False,[]
409,1213,Music Production Team (Music Dept.),Music,<p>This position supports productions for the ...,"Audio, video, and stage production support for...",True,2022-02-10T20:45:26.4490587,2022-01-04T22:00:00,,Tyler McNiven,...,2022-01-05T22:00:00,2016-01-20T01:00:00,True,False,0,[],[],False,False,[]
410,341008,Turcotte TA,Physics,<p>Will help set up lab activities and assist ...,Looking for GESCI 208 Robotics TA- 3 positions...,True,2022-01-03T16:37:48.0842611,2022-01-03T19:00:00,,Stephen Turcotte,...,2022-01-03T19:00:00,2018-04-03T01:00:00,True,False,0,[],[],False,False,[]


## SIMPLE OVERVIEW

In [4]:
print(f'''
    THERE ARE {data_jobs.jobID.nunique()} JOBS.
    HIGHEST PAY JOB:\'{data_jobs[data_jobs.payRate==data_jobs.payRate.max()]['title'].iloc[0]}\' WITH {data_jobs.payRate.max()} DOLLARS AN HOUR.
    ONLINE JOBS: {data_jobs[data_jobs["title"].str.contains('Online')].shape[0]} OUT OF {data_jobs.shape[0]}.
''')


    THERE ARE 412 JOBS.
    HIGHEST PAY JOB:'Health Center Lab Assistant' WITH 11.5 DOLLARS AN HOUR.
    ONLINE JOBS: 359 OUT OF 412.



## CLEAN THE DATA

In [5]:
col_dates = ['dateUpdated','startDate','endDate','beginningDate','recruitingStartDate']

data_jobs[col_dates] = data_jobs[col_dates].astype('datetime64[ns]')

columns_to_drop = [
    'jobID', # not needed for EDA
    # 'description', # not needed for EDA
    # 'summary', # not needed for EDA
    'displayJob', # single boolean
    'dateUpdated','startDate','endDate', # not needed for display
    'approximateHoursPerWeek', # not consistent
    'positionsAllocated', # not relevant
    'positionsAvailble', # not relevant
    'workSchedule', # not needed for EDA
    'requireResume', # not relevant
    'limitApplicants', # not relevant
    'limitNumber', # not relevant
    'applicants', # empty
    'jobQuestions', # empty
    'isOnline', # not accurate
    'allowOnline', # not accurate
    'jobMajors' # not relevant
    ]

data_jobs.drop(columns_to_drop, axis=1, inplace=True)

data_jobs['description'] = data_jobs['description'].apply(lambda x: [p.text.strip() for p in BeautifulSoup(x).find_all('p') if p.text.strip() != ''])

## OPTIONAL: Save as a CSV

In [6]:
# data_jobs.to_csv('StudentEmployment.csv')

## FILTER OUT THE JOBS THAT HAVE 'Online','Custodian', and 'TA' IN THEIR TITLE

In [7]:
# JOBS FILTERED. NOT ONLINE, NOR CUSTODIAN, AND NOR TA.

remove = ['Online','Custodian','TA']

data_filtered = data_jobs[~data_jobs["title"].str.contains('|'.join(remove))].sort_values('payRate',ascending=False).reset_index().drop('index',axis=1)

print(f'{data_filtered.shape[0]} JOBS OUT OF {data_jobs.shape[0]} POSTED JOBS.')

pd.set_option('display.max_rows', None)

data_filtered

37 JOBS OUT OF 412 POSTED JOBS.


Unnamed: 0,title,departmentName,description,summary,managerName,payRate,beginningDate,recruitingStartDate
0,Health Center Lab Assistant,Student Health Center,"[PRINCIPLE DUTIES, REQUIREMENTS, APPLICATION I...","Assist lab manager in phlebotomy, performance ...",Emma Taylor,11.5,2022-09-12 12:00:00,2022-04-20 01:00:00
1,Clinician Aide,Student Health Center,"[PRINCIPLE DUTIES, REQUIREMENTS, APPLICATION I...",Assist health care providers and nursing staff...,Emma Taylor,10.15,2022-09-01 12:00:00,2022-03-21 01:00:00
2,Office Operations Student Lead,Housing and Student Living,"[Knowledge:, Apartment Living Standards, Stude...",The Office Operations Student Lead works direc...,Andrew Merrick,10.0,2022-09-01 18:00:00,2021-07-28 01:00:00
3,Employer Relations Content Creator,Career & Employment Services,[The Employer Relations Content Creator will b...,Come join the growing and exciting BYU-Idaho C...,Janet Barton,10.0,2022-09-05 06:00:00,2022-07-26 01:00:00
4,Graphic Designer,Music,[Graphic Designer for the Department of Music....,Create posters and digital ads for department ...,Maria Nate,10.0,2022-09-02 06:00:00,2018-07-23 01:00:00
5,Student Project Manager,Online Curriculum Development,[The student project manager position undertak...,Assist in managing the audiovisual section of ...,Brian Carter,10.0,2022-09-09 12:00:00,2016-11-28 01:00:00
6,Data Entry - Alumni,Alumni,[Please read this posting thoroughly and caref...,This position enters data into the Alumni data...,Kathy Godfrey,10.0,2022-09-13 00:00:00,2022-08-22 01:00:00
7,Food Services Student Jobs,University Food Services,"[Fast-paced, demanding job. We need dependabl...",Fall Food Service Jobs Start at $10 PER HOUR a...,Fauneil Schultz,10.0,2022-09-16 00:00:00,2022-06-01 01:00:00
8,Career Success Mentor,Career Center,[We are seeking enthusiastic individuals who c...,Hiring for Fall Semester 2022 - offering train...,Sheila Wener,10.0,2022-08-29 20:00:00,2019-11-14 01:00:00
9,Ropes Course Facilitator,Student Activities,[],"Summary\n\nAs a ropes course facilitator, you ...",Jason Thornton,9.6,2022-09-06 06:00:00,2016-09-01 01:00:00


## OPTIONAL: Save as a CSV

In [8]:
# data_filtered.to_csv('StudentEmployment_filtered.csv')