In [26]:
from bs4 import BeautifulSoup
from urllib.request import urlopen
import re
import csv
import pandas as pd
import os

BASE = 'https://www.schwabjobs.com'

 
def get_category(response):
    bs_obj = BeautifulSoup(response, "html.parser")
    if bs_obj:
        cat_table = bs_obj.find_all("table",{"class": "list-categores-table"})

        catLinks = []
        numPages = []

        for page in cat_table:
            linkTags = page.find_all('a', attrs={'href': re.compile("/ListJobs")})
            divTags = page.find_all('div')
            for tag in linkTags:
                catLinks.append(tag.get('href'))
            for div in divTags:
                num = int(re.search(r'\d+', div.text).group())
                numPages.append(num)
        return dict(zip(catLinks, numPages))
        
def get_pages(categories):
    pageLinks = []
    for key, value in categories.items():
        if("---" not in key):
            if(value > 30):
                pageCount = 1
                for i in range(1,value,29):
                    pageLinks.append(BASE + key + "Page-" + str(pageCount))
                    pageCount = pageCount + 1
                
            else:
                pageLinks.append(BASE + key)
    return pageLinks
        

def scrape_pages(pages):
    for cat in categories:
        cat_page = urlopen(BASE + cat)
        soup = BeautifulSoup(cat_page)
        table = soup.find('table', 'JobListTable')
        pageDataframe = pd.DataFrame([])
        for link in table.findAll('a', attrs={'href': re.compile("/ShowJob")}):
            page = BASE + link.get('href')
            print(page)
            pageDict = scrape(page)
            if (type(pageDict) is dict):
                data = pd.DataFrame([pageDict])
                # if file does not exist write header 
                if not os.path.isfile('schwaData.csv'):
                    data.to_csv('schwaData.csv', header='column_names')
                else: # else it exists so append without writing the header
                    data.to_csv('schwaData.csv', mode='a', header=False)
        
def scrape(page):
    
    try:
        posting = urlopen(page)
        soup2 =  BeautifulSoup(posting)
        value = soup2.findAll("div", {"class": "desc"})
    except:
        return ("An exception occurred")
    
    headers = ["Job No", "Institution", "List Id {1,2,3}", "URL (URL of the job posting)", "List id (1,2,3)"]
    values = [1, "Charles Schwab", 1, page, 1]
    
    indices = list(range(1, 101))
    
    wordCountDict = dict.fromkeys(indices,0)
    postingDict = dict(zip(headers, values))
    
    enc = 'utf-8'
    with open('freq100.csv', 'r', encoding = enc) as f:
        reader = csv.reader(f)
        keywords = list(reader)
    arr = [i[0] for i in keywords]

    for val in value:
        st = val.text.split()
        for word in st:
            if(word in arr):
                ind = arr.index(word)
                wordCountDict[ind] = wordCountDict.get(ind, 0) + 1
    dataDict = postingDict.copy()  
    return {**postingDict, **wordCountDict} 


                
if __name__ == '__main__':
    res = urlopen("https://www.schwabjobs.com/List/Custom/Schwab-Job-Category")
    categories = get_category(res)
    pages = get_pages(categories)
    scrape_pages(pages)

https://www.schwabjobs.com/ShowJob/Id/1939618/Deal-Desk-Sourcing-Manager/
https://www.schwabjobs.com/ShowJob/Id/1840064/Senior-Team-Manager,-Technology-Procurement,-Vendor-Sourcing-Management/
https://www.schwabjobs.com/ShowJob/Id/1941909/Managing-Director,-CRE-Finance/
https://www.schwabjobs.com/ShowJob/Id/1863067/Sr.-Manager,-Finance/
https://www.schwabjobs.com/ShowJob/Id/1919260/Sr.-Manager,-Treasury/
https://www.schwabjobs.com/ShowJob/Id/1844187/Third-Party-Risk-Operations-–-Senior-Team-Manager/
https://www.schwabjobs.com/ShowJob/Id/1677626/Senior-Team-Manager,-Finance/
https://www.schwabjobs.com/ShowJob/Id/1931170/Senior-Finance-Specialist/
https://www.schwabjobs.com/ShowJob/Id/1932656/Senior-Team-Manager,-Finance/
https://www.schwabjobs.com/ShowJob/Id/1878137/Manager,-Finance/
https://www.schwabjobs.com/ShowJob/Id/1905839/Sr.-Specialist-Expense-Forecasting/
https://www.schwabjobs.com/ShowJob/Id/1868503/Senior-Team-Manager-SEC-Reporting/
https://www.schwabjobs.com/ShowJob/Id/19363

https://www.schwabjobs.com/ShowJob/Id/1913383/Schwab-Charitable-Managing-Director/
https://www.schwabjobs.com/ShowJob/Id/1935430/Privacy-Data-Incident-Manager/
https://www.schwabjobs.com/ShowJob/Id/1920555/Sr-Manager-Compliance-Risk-Management/
https://www.schwabjobs.com/ShowJob/Id/1925668/Compliance-Director-Retail-Investment-Advisory/
https://www.schwabjobs.com/ShowJob/Id/1755145/Relationship-Manager-Centralized-Supervision/
https://www.schwabjobs.com/ShowJob/Id/1935429/Banking-Regulatory-Exam-Director/
https://www.schwabjobs.com/ShowJob/Id/1936386/Bank-Testing-Manager-2nd-Line-of-Defense/
https://www.schwabjobs.com/ShowJob/Id/1859966/Compliance-Manager-Banking-and-Trust-Training/
https://www.schwabjobs.com/ShowJob/Id/1851515/Managing-Director,-Supervision-Controls/
https://www.schwabjobs.com/ShowJob/Id/894566/Senior-Manager-Risk-Assessment-(Bank-Compliance)/
https://www.schwabjobs.com/ShowJob/Id/1925338/Specialist/
https://www.schwabjobs.com/ShowJob/Id/1864570/Relationship-Specialis

https://www.schwabjobs.com/ShowJob/Id/1929377/VP,-Financial-Consultant-NYC-(Park-Ave)/
https://www.schwabjobs.com/ShowJob/Id/1920849/VP,-Financial-Consultant-Providence,-RI/
https://www.schwabjobs.com/ShowJob/Id/1929116/VP,-Financial-Consultant-Portland,-ME/
https://www.schwabjobs.com/ShowJob/Id/1939861/VP,-Financial-Consultant-Plantation,-FL/
https://www.schwabjobs.com/ShowJob/Id/1935240/VP,-Regional-Financial-Consultant-Denver,-CO/
https://www.schwabjobs.com/ShowJob/Id/1936665/VP,-Regional-Financial-Consultant-NYC,-NY/
https://www.schwabjobs.com/ShowJob/Id/1917819/Private-Client-Advisor/
https://www.schwabjobs.com/ShowJob/Id/1744054/Sr.-Team-Manager,-Wealth-Strategies-Group/
https://www.schwabjobs.com/ShowJob/Id/1927245/Managing-Director-Planning-Portfolio-Group/
https://www.schwabjobs.com/ShowJob/Id/1854941/Sr.-Team-Manager,-Schwab-Private-Client/
https://www.schwabjobs.com/ShowJob/Id/1653442/Private-Client-Advisor/
https://www.schwabjobs.com/ShowJob/Id/1695402/Sr.-Wealth-Strategist

https://www.schwabjobs.com/ShowJob/Id/1841559/Managing-Director-–-MF-ETF-Platform-Client-Experience/
https://www.schwabjobs.com/ShowJob/Id/1920556/Trading-Product-Manager/
https://www.schwabjobs.com/ShowJob/Id/1871493/Digital-Program-Manager/
https://www.schwabjobs.com/ShowJob/Id/1939458/Business-Technology-Process-Specialist/
https://www.schwabjobs.com/ShowJob/Id/1896504/Senior-Project-Manager-Change-Management-Lead/
https://www.schwabjobs.com/ShowJob/Id/1941924/Sr.-Manager,-Client-Experience/
https://www.schwabjobs.com/ShowJob/Id/1892985/Product-Owner-Business-Analyst/
https://www.schwabjobs.com/ShowJob/Id/1811109/Senior-Program-Manager,-Advisor-Services-Advisor-Family-Office/
https://www.schwabjobs.com/ShowJob/Id/1884727/Sr-Manager-Security-Management/
https://www.schwabjobs.com/ShowJob/Id/1939461/Program-Coordinator/
https://www.schwabjobs.com/ShowJob/Id/1945076/Sr.-Business-Acceptance-Process-Consultant/
https://www.schwabjobs.com/ShowJob/Id/1889101/Conversion-Analyst/
https://www