In [33]:
import pandas as pd
import numpy as np
import requests
import json
from bs4 import BeautifulSoup
from time import sleep
from ipynb.fs.defs.job_postings_scraper import BaseScraper, check_null_rows

`companies = ['csl', 'cba', 'bhp', 'wow', 'tls']`

In [3]:
## Test code

test_url = 'https://api-seek.prod.companyreview.co/companies/432334/company-reviews?page=1&sort=-upvote_count&api_key=jwt_prodSeekAuBrowserKey'
response = requests.get(test_url)
soup = BeautifulSoup(response.text, 'html.parser')
test_json = json.loads(soup.text)['data']

In [17]:
test_json[1]

{'company_id': '432334',
 'company_recommended': False,
 'salary_summary': 'fair',
 'job_title': 'Quality Assurance Role',
 'review_title': 'Another capitalist hellscape',
 'pros': 'Discounts, co-workers. ',
 'cons': 'Constantly asked to do the work of 3 people then blamed when things don’t get done. \nSoooo much food thrown away even after donating lots',
 'created_at': '2022-08-16T01:35:55.000Z',
 'rating': {'company_overall': 1,
  'benefits_and_perks': 1,
  'career_opportunity': 1,
  'executive_management': 1,
  'work_environment': 1,
  'work_life_balance': 1,
  'diversity': 5},
 'review_id': '166061375522536',
 'upvotes': {'user_ids': [],
  'count': 4,
  'last_upvote_at': '2022-11-22T08:11:27.458Z'},
 'replies': [],
 'anonymize_job_title': False,
 'company_name': 'Woolworths Group',
 'work_location': 'North Shore & Northern Beaches Sydney NSW',
 'id': '166061375522536',
 'is_anonymized': False,
 'rating_benefits_and_perks': 1,
 'rating_career_opportunity': 1,
 'rating_company_overa

In [16]:
test_data = {}
for key in test_json[1]['rating'].keys():
    test_data[key] = test_json[1]['rating'][key]
    
test_data

{'company_overall': 1,
 'benefits_and_perks': 1,
 'career_opportunity': 1,
 'executive_management': 1,
 'work_environment': 1,
 'work_life_balance': 1,
 'diversity': 5}

In [26]:
class SeekCompanyReviewScraper(BaseScraper):
    
    def __init__(self, total_pages, *args, **kwargs):
        super(SeekCompanyReviewScraper, self).__init__(*args, **kwargs)
        self.total_pages = total_pages
        
    def scrape(self, sleep_duration=3, verbose=True):
        """
        Scrape the specified URL.
        
        ------------
        Input(s):
        sleep_duration (int, default=3): sleep duration for each GET request.
        verbose (bool, default=True): whether to print progress for each individual job scraped.
        
        Output(s):
        None. Writes data into the `data` attribute of the class.
        
        """
        # Calculate the list of starting positions for search, to iterate
        iter_list = [p+1 for p in range(self.total_pages)]
        
        # Get the scraping URL from the raw URL
        url_scrape = "{}?page={}&sort=-upvote_count&api_key=jwt_prodSeekAuBrowserKey"
    
        # For each page of the search, scrape all details and append to data storage attribute
        for i in iter_list:
            # Request the website to scrape, and sleep
            res = requests.get(url_scrape.format(self.raw_url, i))
            sleep(sleep_duration)
            
            # Parse the html of that site, and parse the json object as dict
            soup = BeautifulSoup(res.text, 'html.parser')
            json_data = json.loads(soup.text)['data']
            num_reviews_on_this_page = len(json_data)

            # Initiate an individual data point to attach to
            data_individual = {}
            
            # Declare things to scrape from each review
            to_scrape = [
                'company_name',
                'job_title',
                'review_title',
                'company_recommended',
                'salary_summary',
                'pros',
                'cons',
                'rating_benefits_and_perks',
                'rating_career_opportunity',
                'rating_company_overall',
                'rating_executive_management',
                'rating_work_environment',
                'rating_work_life_balance',
                'rating_diversity'
            ]
            # For each job found, extract the job information
            for j in range(len(json_data)):
                if verbose:
                    print(f'Scraping review {j+1} of {num_reviews_on_this_page} on page {i}...')
                try:
                    for aspect in to_scrape:
                        data_individual[aspect] = json_data[j][aspect]
                except:
                    continue

                # Append the job details to the stored data attribute
                self.data.append(data_individual)

                # Reset the individual data point to blank, ready to scrape the next job
                data_individual = {}

        if verbose:
            print('Done scraping all data.')

# Woolworths Group: Company reviews on Seek

In [27]:
wow_seek_company_reviews = SeekCompanyReviewScraper(
    total_pages=65,
    raw_url='https://api-seek.prod.companyreview.co/companies/432334/company-reviews'
)

wow_seek_company_reviews.scrape(sleep_duration=1.5)

wow_reviews_df = wow_seek_company_reviews.save_data('data/company_reviews_seek/wow_company_reviews.csv')

Scraping review 1 of 10 on page 1...
Scraping review 2 of 10 on page 1...
Scraping review 3 of 10 on page 1...
Scraping review 4 of 10 on page 1...
Scraping review 5 of 10 on page 1...
Scraping review 6 of 10 on page 1...
Scraping review 7 of 10 on page 1...
Scraping review 8 of 10 on page 1...
Scraping review 9 of 10 on page 1...
Scraping review 10 of 10 on page 1...
Scraping review 1 of 10 on page 2...
Scraping review 2 of 10 on page 2...
Scraping review 3 of 10 on page 2...
Scraping review 4 of 10 on page 2...
Scraping review 5 of 10 on page 2...
Scraping review 6 of 10 on page 2...
Scraping review 7 of 10 on page 2...
Scraping review 8 of 10 on page 2...
Scraping review 9 of 10 on page 2...
Scraping review 10 of 10 on page 2...
Scraping review 1 of 10 on page 3...
Scraping review 2 of 10 on page 3...
Scraping review 3 of 10 on page 3...
Scraping review 4 of 10 on page 3...
Scraping review 5 of 10 on page 3...
Scraping review 6 of 10 on page 3...
Scraping review 7 of 10 on page 3...

Scraping review 1 of 10 on page 23...
Scraping review 2 of 10 on page 23...
Scraping review 3 of 10 on page 23...
Scraping review 4 of 10 on page 23...
Scraping review 5 of 10 on page 23...
Scraping review 6 of 10 on page 23...
Scraping review 7 of 10 on page 23...
Scraping review 8 of 10 on page 23...
Scraping review 9 of 10 on page 23...
Scraping review 10 of 10 on page 23...
Scraping review 1 of 10 on page 24...
Scraping review 2 of 10 on page 24...
Scraping review 3 of 10 on page 24...
Scraping review 4 of 10 on page 24...
Scraping review 5 of 10 on page 24...
Scraping review 6 of 10 on page 24...
Scraping review 7 of 10 on page 24...
Scraping review 8 of 10 on page 24...
Scraping review 9 of 10 on page 24...
Scraping review 10 of 10 on page 24...
Scraping review 1 of 10 on page 25...
Scraping review 2 of 10 on page 25...
Scraping review 3 of 10 on page 25...
Scraping review 4 of 10 on page 25...
Scraping review 5 of 10 on page 25...
Scraping review 6 of 10 on page 25...
Scraping r

Scraping review 1 of 10 on page 45...
Scraping review 2 of 10 on page 45...
Scraping review 3 of 10 on page 45...
Scraping review 4 of 10 on page 45...
Scraping review 5 of 10 on page 45...
Scraping review 6 of 10 on page 45...
Scraping review 7 of 10 on page 45...
Scraping review 8 of 10 on page 45...
Scraping review 9 of 10 on page 45...
Scraping review 10 of 10 on page 45...
Scraping review 1 of 10 on page 46...
Scraping review 2 of 10 on page 46...
Scraping review 3 of 10 on page 46...
Scraping review 4 of 10 on page 46...
Scraping review 5 of 10 on page 46...
Scraping review 6 of 10 on page 46...
Scraping review 7 of 10 on page 46...
Scraping review 8 of 10 on page 46...
Scraping review 9 of 10 on page 46...
Scraping review 10 of 10 on page 46...
Scraping review 1 of 10 on page 47...
Scraping review 2 of 10 on page 47...
Scraping review 3 of 10 on page 47...
Scraping review 4 of 10 on page 47...
Scraping review 5 of 10 on page 47...
Scraping review 6 of 10 on page 47...
Scraping r

# CBA: Company reviews on Seek

In [29]:
cba_seek_company_reviews = SeekCompanyReviewScraper(
    total_pages=105,
    raw_url='https://api-seek.prod.companyreview.co/companies/432306/company-reviews'
)

cba_seek_company_reviews.scrape(sleep_duration=1.5)

cba_reviews_df = cba_seek_company_reviews.save_data('data/company_reviews_seek/cba_company_reviews.csv')

Scraping review 1 of 10 on page 1...
Scraping review 2 of 10 on page 1...
Scraping review 3 of 10 on page 1...
Scraping review 4 of 10 on page 1...
Scraping review 5 of 10 on page 1...
Scraping review 6 of 10 on page 1...
Scraping review 7 of 10 on page 1...
Scraping review 8 of 10 on page 1...
Scraping review 9 of 10 on page 1...
Scraping review 10 of 10 on page 1...
Scraping review 1 of 10 on page 2...
Scraping review 2 of 10 on page 2...
Scraping review 3 of 10 on page 2...
Scraping review 4 of 10 on page 2...
Scraping review 5 of 10 on page 2...
Scraping review 6 of 10 on page 2...
Scraping review 7 of 10 on page 2...
Scraping review 8 of 10 on page 2...
Scraping review 9 of 10 on page 2...
Scraping review 10 of 10 on page 2...
Scraping review 1 of 10 on page 3...
Scraping review 2 of 10 on page 3...
Scraping review 3 of 10 on page 3...
Scraping review 4 of 10 on page 3...
Scraping review 5 of 10 on page 3...
Scraping review 6 of 10 on page 3...
Scraping review 7 of 10 on page 3...

Scraping review 1 of 10 on page 23...
Scraping review 2 of 10 on page 23...
Scraping review 3 of 10 on page 23...
Scraping review 4 of 10 on page 23...
Scraping review 5 of 10 on page 23...
Scraping review 6 of 10 on page 23...
Scraping review 7 of 10 on page 23...
Scraping review 8 of 10 on page 23...
Scraping review 9 of 10 on page 23...
Scraping review 10 of 10 on page 23...
Scraping review 1 of 10 on page 24...
Scraping review 2 of 10 on page 24...
Scraping review 3 of 10 on page 24...
Scraping review 4 of 10 on page 24...
Scraping review 5 of 10 on page 24...
Scraping review 6 of 10 on page 24...
Scraping review 7 of 10 on page 24...
Scraping review 8 of 10 on page 24...
Scraping review 9 of 10 on page 24...
Scraping review 10 of 10 on page 24...
Scraping review 1 of 10 on page 25...
Scraping review 2 of 10 on page 25...
Scraping review 3 of 10 on page 25...
Scraping review 4 of 10 on page 25...
Scraping review 5 of 10 on page 25...
Scraping review 6 of 10 on page 25...
Scraping r

Scraping review 1 of 10 on page 45...
Scraping review 2 of 10 on page 45...
Scraping review 3 of 10 on page 45...
Scraping review 4 of 10 on page 45...
Scraping review 5 of 10 on page 45...
Scraping review 6 of 10 on page 45...
Scraping review 7 of 10 on page 45...
Scraping review 8 of 10 on page 45...
Scraping review 9 of 10 on page 45...
Scraping review 10 of 10 on page 45...
Scraping review 1 of 10 on page 46...
Scraping review 2 of 10 on page 46...
Scraping review 3 of 10 on page 46...
Scraping review 4 of 10 on page 46...
Scraping review 5 of 10 on page 46...
Scraping review 6 of 10 on page 46...
Scraping review 7 of 10 on page 46...
Scraping review 8 of 10 on page 46...
Scraping review 9 of 10 on page 46...
Scraping review 10 of 10 on page 46...
Scraping review 1 of 10 on page 47...
Scraping review 2 of 10 on page 47...
Scraping review 3 of 10 on page 47...
Scraping review 4 of 10 on page 47...
Scraping review 5 of 10 on page 47...
Scraping review 6 of 10 on page 47...
Scraping r

Scraping review 1 of 10 on page 67...
Scraping review 2 of 10 on page 67...
Scraping review 3 of 10 on page 67...
Scraping review 4 of 10 on page 67...
Scraping review 5 of 10 on page 67...
Scraping review 6 of 10 on page 67...
Scraping review 7 of 10 on page 67...
Scraping review 8 of 10 on page 67...
Scraping review 9 of 10 on page 67...
Scraping review 10 of 10 on page 67...
Scraping review 1 of 10 on page 68...
Scraping review 2 of 10 on page 68...
Scraping review 3 of 10 on page 68...
Scraping review 4 of 10 on page 68...
Scraping review 5 of 10 on page 68...
Scraping review 6 of 10 on page 68...
Scraping review 7 of 10 on page 68...
Scraping review 8 of 10 on page 68...
Scraping review 9 of 10 on page 68...
Scraping review 10 of 10 on page 68...
Scraping review 1 of 10 on page 69...
Scraping review 2 of 10 on page 69...
Scraping review 3 of 10 on page 69...
Scraping review 4 of 10 on page 69...
Scraping review 5 of 10 on page 69...
Scraping review 6 of 10 on page 69...
Scraping r

Scraping review 1 of 10 on page 89...
Scraping review 2 of 10 on page 89...
Scraping review 3 of 10 on page 89...
Scraping review 4 of 10 on page 89...
Scraping review 5 of 10 on page 89...
Scraping review 6 of 10 on page 89...
Scraping review 7 of 10 on page 89...
Scraping review 8 of 10 on page 89...
Scraping review 9 of 10 on page 89...
Scraping review 10 of 10 on page 89...
Scraping review 1 of 10 on page 90...
Scraping review 2 of 10 on page 90...
Scraping review 3 of 10 on page 90...
Scraping review 4 of 10 on page 90...
Scraping review 5 of 10 on page 90...
Scraping review 6 of 10 on page 90...
Scraping review 7 of 10 on page 90...
Scraping review 8 of 10 on page 90...
Scraping review 9 of 10 on page 90...
Scraping review 10 of 10 on page 90...
Scraping review 1 of 10 on page 91...
Scraping review 2 of 10 on page 91...
Scraping review 3 of 10 on page 91...
Scraping review 4 of 10 on page 91...
Scraping review 5 of 10 on page 91...
Scraping review 6 of 10 on page 91...
Scraping r

# CSL: Company reviews on Seek

In [30]:
csl_seek_company_reviews = SeekCompanyReviewScraper(
    total_pages=3,
    raw_url='https://api-seek.prod.companyreview.co/companies/436233/company-reviews'
)

csl_seek_company_reviews.scrape(sleep_duration=1.5)

csl_reviews_df = csl_seek_company_reviews.save_data('data/company_reviews_seek/csl_company_reviews.csv')

Scraping review 1 of 10 on page 1...
Scraping review 2 of 10 on page 1...
Scraping review 3 of 10 on page 1...
Scraping review 4 of 10 on page 1...
Scraping review 5 of 10 on page 1...
Scraping review 6 of 10 on page 1...
Scraping review 7 of 10 on page 1...
Scraping review 8 of 10 on page 1...
Scraping review 9 of 10 on page 1...
Scraping review 10 of 10 on page 1...
Scraping review 1 of 10 on page 2...
Scraping review 2 of 10 on page 2...
Scraping review 3 of 10 on page 2...
Scraping review 4 of 10 on page 2...
Scraping review 5 of 10 on page 2...
Scraping review 6 of 10 on page 2...
Scraping review 7 of 10 on page 2...
Scraping review 8 of 10 on page 2...
Scraping review 9 of 10 on page 2...
Scraping review 10 of 10 on page 2...
Scraping review 1 of 5 on page 3...
Scraping review 2 of 5 on page 3...
Scraping review 3 of 5 on page 3...
Scraping review 4 of 5 on page 3...
Scraping review 5 of 5 on page 3...
Done scraping all data.


# BHP: Company reviews on Seek

In [31]:
bhp_seek_company_reviews = SeekCompanyReviewScraper(
    total_pages=80,
    raw_url='https://api-seek.prod.companyreview.co/companies/432317/company-reviews'
)

bhp_seek_company_reviews.scrape(sleep_duration=1.5)

bhp_reviews_df = bhp_seek_company_reviews.save_data('data/company_reviews_seek/bhp_company_reviews.csv')

Scraping review 1 of 10 on page 1...
Scraping review 2 of 10 on page 1...
Scraping review 3 of 10 on page 1...
Scraping review 4 of 10 on page 1...
Scraping review 5 of 10 on page 1...
Scraping review 6 of 10 on page 1...
Scraping review 7 of 10 on page 1...
Scraping review 8 of 10 on page 1...
Scraping review 9 of 10 on page 1...
Scraping review 10 of 10 on page 1...
Scraping review 1 of 10 on page 2...
Scraping review 2 of 10 on page 2...
Scraping review 3 of 10 on page 2...
Scraping review 4 of 10 on page 2...
Scraping review 5 of 10 on page 2...
Scraping review 6 of 10 on page 2...
Scraping review 7 of 10 on page 2...
Scraping review 8 of 10 on page 2...
Scraping review 9 of 10 on page 2...
Scraping review 10 of 10 on page 2...
Scraping review 1 of 10 on page 3...
Scraping review 2 of 10 on page 3...
Scraping review 3 of 10 on page 3...
Scraping review 4 of 10 on page 3...
Scraping review 5 of 10 on page 3...
Scraping review 6 of 10 on page 3...
Scraping review 7 of 10 on page 3...

Scraping review 1 of 10 on page 23...
Scraping review 2 of 10 on page 23...
Scraping review 3 of 10 on page 23...
Scraping review 4 of 10 on page 23...
Scraping review 5 of 10 on page 23...
Scraping review 6 of 10 on page 23...
Scraping review 7 of 10 on page 23...
Scraping review 8 of 10 on page 23...
Scraping review 9 of 10 on page 23...
Scraping review 10 of 10 on page 23...
Scraping review 1 of 10 on page 24...
Scraping review 2 of 10 on page 24...
Scraping review 3 of 10 on page 24...
Scraping review 4 of 10 on page 24...
Scraping review 5 of 10 on page 24...
Scraping review 6 of 10 on page 24...
Scraping review 7 of 10 on page 24...
Scraping review 8 of 10 on page 24...
Scraping review 9 of 10 on page 24...
Scraping review 10 of 10 on page 24...
Scraping review 1 of 10 on page 25...
Scraping review 2 of 10 on page 25...
Scraping review 3 of 10 on page 25...
Scraping review 4 of 10 on page 25...
Scraping review 5 of 10 on page 25...
Scraping review 6 of 10 on page 25...
Scraping r

Scraping review 1 of 10 on page 45...
Scraping review 2 of 10 on page 45...
Scraping review 3 of 10 on page 45...
Scraping review 4 of 10 on page 45...
Scraping review 5 of 10 on page 45...
Scraping review 6 of 10 on page 45...
Scraping review 7 of 10 on page 45...
Scraping review 8 of 10 on page 45...
Scraping review 9 of 10 on page 45...
Scraping review 10 of 10 on page 45...
Scraping review 1 of 10 on page 46...
Scraping review 2 of 10 on page 46...
Scraping review 3 of 10 on page 46...
Scraping review 4 of 10 on page 46...
Scraping review 5 of 10 on page 46...
Scraping review 6 of 10 on page 46...
Scraping review 7 of 10 on page 46...
Scraping review 8 of 10 on page 46...
Scraping review 9 of 10 on page 46...
Scraping review 10 of 10 on page 46...
Scraping review 1 of 10 on page 47...
Scraping review 2 of 10 on page 47...
Scraping review 3 of 10 on page 47...
Scraping review 4 of 10 on page 47...
Scraping review 5 of 10 on page 47...
Scraping review 6 of 10 on page 47...
Scraping r

Scraping review 1 of 10 on page 67...
Scraping review 2 of 10 on page 67...
Scraping review 3 of 10 on page 67...
Scraping review 4 of 10 on page 67...
Scraping review 5 of 10 on page 67...
Scraping review 6 of 10 on page 67...
Scraping review 7 of 10 on page 67...
Scraping review 8 of 10 on page 67...
Scraping review 9 of 10 on page 67...
Scraping review 10 of 10 on page 67...
Scraping review 1 of 10 on page 68...
Scraping review 2 of 10 on page 68...
Scraping review 3 of 10 on page 68...
Scraping review 4 of 10 on page 68...
Scraping review 5 of 10 on page 68...
Scraping review 6 of 10 on page 68...
Scraping review 7 of 10 on page 68...
Scraping review 8 of 10 on page 68...
Scraping review 9 of 10 on page 68...
Scraping review 10 of 10 on page 68...
Scraping review 1 of 10 on page 69...
Scraping review 2 of 10 on page 69...
Scraping review 3 of 10 on page 69...
Scraping review 4 of 10 on page 69...
Scraping review 5 of 10 on page 69...
Scraping review 6 of 10 on page 69...
Scraping r

# TLS: Company reviews on Seek

In [32]:
tls_seek_company_reviews = SeekCompanyReviewScraper(
    total_pages=151,
    raw_url='https://api-seek.prod.companyreview.co/companies/432298/company-reviews'
)

tls_seek_company_reviews.scrape(sleep_duration=1)

tls_reviews_df = tls_seek_company_reviews.save_data('data/company_reviews_seek/tls_company_reviews.csv')

Scraping review 1 of 10 on page 1...
Scraping review 2 of 10 on page 1...
Scraping review 3 of 10 on page 1...
Scraping review 4 of 10 on page 1...
Scraping review 5 of 10 on page 1...
Scraping review 6 of 10 on page 1...
Scraping review 7 of 10 on page 1...
Scraping review 8 of 10 on page 1...
Scraping review 9 of 10 on page 1...
Scraping review 10 of 10 on page 1...
Scraping review 1 of 10 on page 2...
Scraping review 2 of 10 on page 2...
Scraping review 3 of 10 on page 2...
Scraping review 4 of 10 on page 2...
Scraping review 5 of 10 on page 2...
Scraping review 6 of 10 on page 2...
Scraping review 7 of 10 on page 2...
Scraping review 8 of 10 on page 2...
Scraping review 9 of 10 on page 2...
Scraping review 10 of 10 on page 2...
Scraping review 1 of 10 on page 3...
Scraping review 2 of 10 on page 3...
Scraping review 3 of 10 on page 3...
Scraping review 4 of 10 on page 3...
Scraping review 5 of 10 on page 3...
Scraping review 6 of 10 on page 3...
Scraping review 7 of 10 on page 3...

Scraping review 1 of 10 on page 23...
Scraping review 2 of 10 on page 23...
Scraping review 3 of 10 on page 23...
Scraping review 4 of 10 on page 23...
Scraping review 5 of 10 on page 23...
Scraping review 6 of 10 on page 23...
Scraping review 7 of 10 on page 23...
Scraping review 8 of 10 on page 23...
Scraping review 9 of 10 on page 23...
Scraping review 10 of 10 on page 23...
Scraping review 1 of 10 on page 24...
Scraping review 2 of 10 on page 24...
Scraping review 3 of 10 on page 24...
Scraping review 4 of 10 on page 24...
Scraping review 5 of 10 on page 24...
Scraping review 6 of 10 on page 24...
Scraping review 7 of 10 on page 24...
Scraping review 8 of 10 on page 24...
Scraping review 9 of 10 on page 24...
Scraping review 10 of 10 on page 24...
Scraping review 1 of 10 on page 25...
Scraping review 2 of 10 on page 25...
Scraping review 3 of 10 on page 25...
Scraping review 4 of 10 on page 25...
Scraping review 5 of 10 on page 25...
Scraping review 6 of 10 on page 25...
Scraping r

Scraping review 1 of 10 on page 45...
Scraping review 2 of 10 on page 45...
Scraping review 3 of 10 on page 45...
Scraping review 4 of 10 on page 45...
Scraping review 5 of 10 on page 45...
Scraping review 6 of 10 on page 45...
Scraping review 7 of 10 on page 45...
Scraping review 8 of 10 on page 45...
Scraping review 9 of 10 on page 45...
Scraping review 10 of 10 on page 45...
Scraping review 1 of 10 on page 46...
Scraping review 2 of 10 on page 46...
Scraping review 3 of 10 on page 46...
Scraping review 4 of 10 on page 46...
Scraping review 5 of 10 on page 46...
Scraping review 6 of 10 on page 46...
Scraping review 7 of 10 on page 46...
Scraping review 8 of 10 on page 46...
Scraping review 9 of 10 on page 46...
Scraping review 10 of 10 on page 46...
Scraping review 1 of 10 on page 47...
Scraping review 2 of 10 on page 47...
Scraping review 3 of 10 on page 47...
Scraping review 4 of 10 on page 47...
Scraping review 5 of 10 on page 47...
Scraping review 6 of 10 on page 47...
Scraping r

Scraping review 1 of 10 on page 67...
Scraping review 2 of 10 on page 67...
Scraping review 3 of 10 on page 67...
Scraping review 4 of 10 on page 67...
Scraping review 5 of 10 on page 67...
Scraping review 6 of 10 on page 67...
Scraping review 7 of 10 on page 67...
Scraping review 8 of 10 on page 67...
Scraping review 9 of 10 on page 67...
Scraping review 10 of 10 on page 67...
Scraping review 1 of 10 on page 68...
Scraping review 2 of 10 on page 68...
Scraping review 3 of 10 on page 68...
Scraping review 4 of 10 on page 68...
Scraping review 5 of 10 on page 68...
Scraping review 6 of 10 on page 68...
Scraping review 7 of 10 on page 68...
Scraping review 8 of 10 on page 68...
Scraping review 9 of 10 on page 68...
Scraping review 10 of 10 on page 68...
Scraping review 1 of 10 on page 69...
Scraping review 2 of 10 on page 69...
Scraping review 3 of 10 on page 69...
Scraping review 4 of 10 on page 69...
Scraping review 5 of 10 on page 69...
Scraping review 6 of 10 on page 69...
Scraping r

Scraping review 1 of 10 on page 89...
Scraping review 2 of 10 on page 89...
Scraping review 3 of 10 on page 89...
Scraping review 4 of 10 on page 89...
Scraping review 5 of 10 on page 89...
Scraping review 6 of 10 on page 89...
Scraping review 7 of 10 on page 89...
Scraping review 8 of 10 on page 89...
Scraping review 9 of 10 on page 89...
Scraping review 10 of 10 on page 89...
Scraping review 1 of 10 on page 90...
Scraping review 2 of 10 on page 90...
Scraping review 3 of 10 on page 90...
Scraping review 4 of 10 on page 90...
Scraping review 5 of 10 on page 90...
Scraping review 6 of 10 on page 90...
Scraping review 7 of 10 on page 90...
Scraping review 8 of 10 on page 90...
Scraping review 9 of 10 on page 90...
Scraping review 10 of 10 on page 90...
Scraping review 1 of 10 on page 91...
Scraping review 2 of 10 on page 91...
Scraping review 3 of 10 on page 91...
Scraping review 4 of 10 on page 91...
Scraping review 5 of 10 on page 91...
Scraping review 6 of 10 on page 91...
Scraping r

Scraping review 1 of 10 on page 111...
Scraping review 2 of 10 on page 111...
Scraping review 3 of 10 on page 111...
Scraping review 4 of 10 on page 111...
Scraping review 5 of 10 on page 111...
Scraping review 6 of 10 on page 111...
Scraping review 7 of 10 on page 111...
Scraping review 8 of 10 on page 111...
Scraping review 9 of 10 on page 111...
Scraping review 10 of 10 on page 111...
Scraping review 1 of 10 on page 112...
Scraping review 2 of 10 on page 112...
Scraping review 3 of 10 on page 112...
Scraping review 4 of 10 on page 112...
Scraping review 5 of 10 on page 112...
Scraping review 6 of 10 on page 112...
Scraping review 7 of 10 on page 112...
Scraping review 8 of 10 on page 112...
Scraping review 9 of 10 on page 112...
Scraping review 10 of 10 on page 112...
Scraping review 1 of 10 on page 113...
Scraping review 2 of 10 on page 113...
Scraping review 3 of 10 on page 113...
Scraping review 4 of 10 on page 113...
Scraping review 5 of 10 on page 113...
Scraping review 6 of 10

Scraping review 1 of 10 on page 132...
Scraping review 2 of 10 on page 132...
Scraping review 3 of 10 on page 132...
Scraping review 4 of 10 on page 132...
Scraping review 5 of 10 on page 132...
Scraping review 6 of 10 on page 132...
Scraping review 7 of 10 on page 132...
Scraping review 8 of 10 on page 132...
Scraping review 9 of 10 on page 132...
Scraping review 10 of 10 on page 132...
Scraping review 1 of 10 on page 133...
Scraping review 2 of 10 on page 133...
Scraping review 3 of 10 on page 133...
Scraping review 4 of 10 on page 133...
Scraping review 5 of 10 on page 133...
Scraping review 6 of 10 on page 133...
Scraping review 7 of 10 on page 133...
Scraping review 8 of 10 on page 133...
Scraping review 9 of 10 on page 133...
Scraping review 10 of 10 on page 133...
Scraping review 1 of 10 on page 134...
Scraping review 2 of 10 on page 134...
Scraping review 3 of 10 on page 134...
Scraping review 4 of 10 on page 134...
Scraping review 5 of 10 on page 134...
Scraping review 6 of 10

# All Company Reviews

In [36]:
all_company_reviews = [
    wow_reviews_df,
    cba_reviews_df,
    csl_reviews_df,
    bhp_reviews_df,
    tls_reviews_df
]

all_company_reviews_df = pd.concat(all_company_reviews, ignore_index=True)
check_null_rows(all_company_reviews_df)

Unnamed: 0,company_name,job_title,review_title,company_recommended,salary_summary,pros,cons,rating_benefits_and_perks,rating_career_opportunity,rating_company_overall,rating_executive_management,rating_work_environment,rating_work_life_balance,rating_diversity


In [37]:
## Exporting all company reviews to CSV

all_company_reviews_df.to_csv('data/company_reviews_seek/all_company_reviews.csv', index=False, encoding='utf-8')

In [38]:
all_company_reviews_df.shape

(4016, 14)

In [39]:
all_company_reviews_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4016 entries, 0 to 4015
Data columns (total 14 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   company_name                 4016 non-null   object
 1   job_title                    4016 non-null   object
 2   review_title                 4016 non-null   object
 3   company_recommended          4016 non-null   bool  
 4   salary_summary               4016 non-null   object
 5   pros                         4016 non-null   object
 6   cons                         4016 non-null   object
 7   rating_benefits_and_perks    4016 non-null   int64 
 8   rating_career_opportunity    4016 non-null   int64 
 9   rating_company_overall       4016 non-null   int64 
 10  rating_executive_management  4016 non-null   int64 
 11  rating_work_environment      4016 non-null   int64 
 12  rating_work_life_balance     4016 non-null   int64 
 13  rating_diversity             4016