### Importing packages

In [6]:
import requests
import pandas as pd

### Creating functions to pull data from API
Reference: https://www.glassdoor.com/developer/index.htm

In [7]:
def get_jobs(partner_id, api_key, keyword, page, min_rating = 0, min_rec_perc = 0, min_reviews = 0):
    url = "https://api.glassdoor.com/api/api.htm"
    params = {
        "v": "1",
        "format": "jsonß",
        "t.p": partner_id,
        "t.k": api_key,
        "userip": "0.0.0.0",
        "useragent": "Mozilla",
        "action": "jobs",
        "country": 'United States', ### adding filters 
        "q": keyword,
        "page": page,
        "minRating": min_rating, ### adding filters 
        "minRecommendPercent": min_rec_perc, ### adding filters 
        "minReviewCount": min_reviews ### adding filters 
    }

    response = requests.get(url, params=params)
    data = response.json()
    if response.status_code == 200:
        return data["response"]["results"]
    else:
        print("Error:", data["status"])
        return []

### Requesting user's API credentials from user
The API requires a partner ID and an API Key to authenticate the session. So the first step is to get the credentials from the user.

Moreover, the user will be asked which filters they would like to add to the query.

In case no filter is required, the values will be set to 0, meaning the program will pull every job based on the search keywords. 

In [10]:
usr_pid = input('Insert your partner ID: ')
usr_api_key = input('Insert your API Key: ')
min_rating = float(input('Insert the min desired rating: '))
min_rec_perc = float(input('Insert the min recommendation %: '))
min_reviews = int(input('Insert the min review count: '))

print(f'Min Rating: {min_rating}\nMin Recommendation %: {min_rating}%\nMin Review Count: {min_reviews}')

Min Rating: 4.0
Min Recommendation %: 4.0%
Min Review Count: 20


### Creating list with keywords to search through Glassdoor's API
Lastly, the program asks the user to input all the search keywords to generate the list of jobs.

It then goes on to create an Excel file containing all the scraped data in the same folder where the script is located.

In [None]:
keyword_list = []

while True:
    answer = ''
    kw = input('Type your job keyword: ').title()
    if kw in keyword_list:
        print('This keyword has already been included. Type another one')
    else:
        keyword_list.append(kw)
        print(f'Keyword list: {keyword_list}')

        while answer not in ('y', 'n'):
            answer = input('Would you like to add any other keyword? (y/n) ').lower()

        if answer == 'n': break
        
        else: pass
        
jobs_interested = []
page = 1

for keyword in keyword_list:
    while True:
        jobs = get_jobs(usr_pid, usr_api_key, keyword, page,  min_rating,  min_rec_perc, min_reviews)
        if not jobs:
            break
        jobs_interested.extend(jobs)
        page += 1

df = pd.DataFrame(jobs_interested)
df.to_excel("data_jobs.xlsx", index=False)
