# code for scraping web data

In [3]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service 
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import pandas as pd
import time
import re

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def get_job_info(soup):
    # Find the <h4> element containing the text 'Qualifications'
    qualifications_header = soup.find('h4', string=lambda text: 'Qualifications' in text)

    # Check if the header is found
    if qualifications_header:
        # Navigate to the parent <span> and find the following <ul> elements
        parent_span = qualifications_header.find_parent('span')
        ul_elements = parent_span.find_all('ul')
        
        # Extract text from each <ul> element
        qualifications_text = []
        for ul in ul_elements:
            for li in ul.find_all('li'):  # Assuming qualifications are in <li> tags
                qualifications_text.append(li.get_text(strip=True))
        
        # Join the texts with a newline separator and return
        return '\n'.join(qualifications_text)
    else:
        return None  # Return None if not found

def extract_years_of_experience(text):
    try:
        # Regular expression to match patterns like '4+ years', '4+ year', '8+ years', or '0 - 3 years'
        pattern = r'(\d+)\+?\s*(?:years?|year)|(\d+)\s*-\s*(\d+)\s*years?'

        # Find all matches in the text
        matches = re.findall(pattern, text)

        years_of_experience = []

        # Process each match
        for match in matches:
            if match[0]:  # Case for '4+ years', '8+ years'
                years_of_experience.append(int(match[0]))
            elif match[1] and match[2]:  # Case for '0 - 3 years'
                # Add the range of years (start and end)
                years_of_experience.extend([int(match[1]), int(match[2])])

        # Return the range or the highest value if found
        return years_of_experience if years_of_experience else 0

    except Exception as e:
        return 0

def extract_salary_range(text):
    try:
        lines = text.split("\n")
        for line in lines:
            line = line.replace(',','')
            #print(line)
            #matches = re.findall(r'\$\s*\d+(?:\.\d{1,2})?',line)
            matches = re.findall(r'\$\s*\d+(?:\.\d{1,2})?', line)
            if (len(matches) == 2 or len(matches) == 1):
                return matches
            else:
                matches = re.findall(r'(\d+)k-(\d+)k', line)
                if (len(matches) >= 2):
                    print(matches)
                    return matches[0:1]
        return 'nan'
    except:
        return 'nan'

options = Options()
options.add_argument("--disable-application-cache")
options.add_argument("--disk-cache-size=0")
options.add_argument("--media-cache-size=0")

driver = webdriver.Chrome(options=options)
chrome_driver_path = './chromedriver'                        #please check chrome driver version matches the chrome version and
service = Service(chrome_driver_path)                        #chrome driver should be downloaded 
driver = webdriver.Chrome(service=service, options=options)  # Set up the WebDriver
driver.maximize_window()  


max_number_of_jobs = 200  # Number of jobs for each search term
#search_jobs = ['Data engineer jobs', 'Data scientist jobs', 'Machine learning engineering jobs', 'Data analyst jobs',' Business analyst jobs', 'Healthcare data engineer jobs', 'retail data engineer jobs']
search_jobs = ['Data engineer jobs',
               'Data scientist jobs',
               'Healthcare data engineer jobs',
               'retail data engineer jobs'
               'Machine learning engineering jobs',
               'Data analyst jobs',
               'Business analyst jobs',
               'Quantitative Analyst jobs', 
               'Quantitative Researcher jobs',
               'Quantitative Research Analyst jobs', 
               'Business Intelligence Analyst jobs']

# Initialize lists to store data
job_titles = []          #job titile 
companies = []           #company name
location = []            #location of the company
site = []                #website in which the jon is listed
job_types = []           #job type(example: full time ,part time,contractor)
health_insurance = []    #health insurance 
Dental_insurance = []    #dental insurance
years_of_experience = [] #experience needed for the job
salary = []              #salary range of the job
benefits_401k = []       #is 401k provided or not
stocks= []               #does the company provide stocks
date_posted = []         #how many days ago the job was posted


# Loop through both lists: max_number_of_jobs and search_jobs
total_jobs_scraped = 0
for idx, search_job in enumerate(search_jobs):
    print(f"Searching for: {search_job}")
    
    
    # Open Google and search for jobs
    driver.get('https://www.google.com')
    search_box = driver.find_element(By.NAME, 'q')
    search_box.send_keys(search_job)
    search_box.send_keys(Keys.RETURN)
    time.sleep(2)

    jobs_tab = driver.find_element(By.LINK_TEXT, 'Jobs')
    jobs_tab.click()

    time.sleep(1)
    
    # Define the number of jobs to scrape for this particular search
    num_jobs = max_number_of_jobs
    #job_elements_list=driver.find_elements(By.XPATH, '//div[@class="L5NwLd"]')
    try:
        for i in range(max_number_of_jobs):
            
            #total_jobs = driver.find_elements(By.XPATH, '//div[@class="L5NwLd"]')
            #print(len(total_jobs))
            job_elements=driver.find_elements(By.XPATH, '//div[@class="L5NwLd"]')[i]
            job_title_elements = driver.find_elements(By.XPATH, '//div[contains(@class, "tNxQIb PUpOsf")]')[i]

            scroll_element=job_elements.find_element(By.XPATH, './parent::div')
            driver.execute_script("arguments[0].scrollIntoView(true);", scroll_element)
            driver.implicitly_wait(2)
            total_jobs_scraped += 1
            job_elements.click()

            try:
                job_name=job_title_elements.text
                company_name= job_title_elements.find_element(By.XPATH, './following-sibling::div').text
                job_location_and_site= job_title_elements.find_element(By.XPATH, './following-sibling::div[2]').text
                job_type_xpath='//span[@class="z1asCe mQ5pwc"]/parent::span/following-sibling::span/span'
                job_type=driver.find_elements(By.XPATH,job_type_xpath )[i].text

                time.sleep(1)
                job_popup = driver.find_element(By.XPATH, f"//c-wiz[@data-title='{job_name}']")

            except:
                continue

            try: #for health insurance 
                job_elements.find_element(By.XPATH, './/span[contains(text(), "Health insurance")]')
                health_insurance.append("provided")
            except:
                health_insurance.append("Not mentioned")
                
            r=driver.find_elements(By.XPATH, '//div[@class="nNzjpf-cS4Vcb-PvZLI-enNyge-KE6vqe-ma6Yeb nNzjpf-cS4Vcb-PvZLI-gV0Xcb-TzA9Ye nNzjpf-cS4Vcb-PvZLI-qiD3me-vJ7A6b"]')
           
            #Click the "More job highlights" snd "Show full description" for more info
            for ri in r:           
                if ("More job highlights" in ri.text):
                    ri.click()
                if ("Show full description" in ri.text):
                    ri.click()
                    
            flag= 0; #for job posted date
            date_posted_list = job_title_elements.find_elements(By.XPATH, '//div[contains(@class, "nYym1e")]')
            for date_posted_entry in date_posted_list:
                if "ago" in date_posted_entry.text:
                    #print(date_posted_entry.text)
                    flag = 1
                    date_posted.append(date_posted_entry.text)
                    break
            if(flag == 0):
                    date_posted.append("1 month ago")
                
            try: #for dental insurance 
                job_elements.find_element(By.XPATH, './/span[contains(text(), "Dental insurance")]')
                Dental_insurance.append("provided") #Dental_insurance.append(True)
            except:
                Dental_insurance.append("Not mentioned") #Dental_insurance.append(False)         

            #for job title,company,location,site,jobjytpe
            job_titles.append(job_name)
            companies.append(company_name)
            location.append(job_location_and_site.split(',')[0].strip())
            site.append(job_location_and_site.split('via')[1].strip())
            job_types.append(job_type)
                    
            job_popup_html = job_popup.get_attribute('outerHTML')# Get the HTML content of the job_popup element
            
            soup = BeautifulSoup(job_popup_html, 'html.parser') # Create a BeautifulSoup object
            job_info=get_job_info(soup)
 
                #for years of experience
            years_of_experience.append(extract_years_of_experience(job_info))  

            try: #for salary 
                sal=job_elements.find_element(By.XPATH, './/span[contains(@aria-label, "Salary")]/span')
                salary.append(sal.text) 
            except Exception as e:
                salary.append(extract_salary_range(job_info)) 
            print(f"extracted job search {i+1} out of {num_jobs} for {search_job}...")


            try: #for 401K 
                driver.find_element(By.XPATH, './/li[contains(text(), "401(k)") or contains(text(), "401k")]')
                benefits_401k.append("provided") 
            except:
                benefits_401k.append("Not mentioned") 

            try: #for stocks
                driver.find_element(By.XPATH, './/li[contains(text(), "stock") or contains(text(), "Equity") or contains(text(), "equity") or contains(text(), "Stock")]')
                stocks.append("provided")
            except:
                stocks.append("Not mentioned")                   
            
    except Exception as e:
        #print(e)
        continue
print(f"Total Jobs scraped = {total_jobs_scraped}")


Searching for: Quantitative Analyst jobs
5 days ago
extracted job search 1 out of 1000 for Quantitative Analyst jobs...
25 days ago
extracted job search 2 out of 1000 for Quantitative Analyst jobs...
3 days ago
extracted job search 3 out of 1000 for Quantitative Analyst jobs...
6 days ago
extracted job search 4 out of 1000 for Quantitative Analyst jobs...
extracted job search 5 out of 1000 for Quantitative Analyst jobs...
extracted job search 6 out of 1000 for Quantitative Analyst jobs...
extracted job search 7 out of 1000 for Quantitative Analyst jobs...
extracted job search 8 out of 1000 for Quantitative Analyst jobs...
2 days ago
extracted job search 9 out of 1000 for Quantitative Analyst jobs...
list index out of range
Searching for: Quantitative Researcher jobs
10 days ago
extracted job search 1 out of 1000 for Quantitative Researcher jobs...
20 days ago
extracted job search 2 out of 1000 for Quantitative Researcher jobs...
extracted job search 3 out of 1000 for Quantitative Resea

# create data frame from scraped data

In [13]:
    
df = pd.DataFrame({
    'Job Title': job_titles,
    'Company': companies,
    'location': location,
    'site':site,
    'jobtype':job_types,
    'health_insurance':health_insurance,
    'Dental_insurance':Dental_insurance,
    'years of experience':years_of_experience,
    'salary':salary,
    'benefits_401k': benefits_401k,
    'stocks': stocks,
    'date_posted':date_posted
})

df.to_csv('data_raw.csv', index=False)
df


Unnamed: 0,Job Title,Company,location,site,jobtype,health_insurance,Dental_insurance,years of experience,salary,benefits_401k,stocks,date_posted
0,Quantitative Analyst,American Century Investments,Santa Clara,LinkedIn,Full-time,Not mentioned,Not mentioned,0,"[$135, $165]",Not mentioned,provided,3 days ago
1,"Quantitative Marketing Analyst, Google, Search...",Google,Mountain View,LinkedIn,Full-time,provided,Not mentioned,0,"[$87000, $128000]",Not mentioned,provided,10 days ago
2,Quant Analyst,"Software Guidance & Assistance, Inc. (SGA, Inc.)",San Jose,LinkedIn,Full-time,Not mentioned,Not mentioned,[3],50–57 an hour,Not mentioned,provided,23 days ago
3,Quantitative Researcher,MSCI Inc.,Berkeley,Glassdoor,Full-time,provided,Not mentioned,0,149K–194K a year,provided,provided,5 days ago
4,Associate Quantitative Analytics and Forecasti...,Exelixis,Alameda,LinkedIn,Full-time,provided,provided,"[7, 5]","[$171000, $243000]",provided,provided,2 days ago
...,...,...,...,...,...,...,...,...,...,...,...,...
290,Analytics & Insights - Business Systems Analyst,W3Global,South San Francisco,LinkedIn,Full-time,Not mentioned,Not mentioned,[5],,provided,Not mentioned,29 days ago
291,Business Analyst,"GTT, LLC",Avila Beach,ZipRecruiter,Full-time,provided,provided,0,[$50],provided,Not mentioned,11 days ago
292,Senior Business Intelligence System Analyst,Partnership HealthPlan of California,Fairfield,LinkedIn,Full-time,Not mentioned,Not mentioned,0,"[$118518.94, $154074.63]",provided,Not mentioned,19 hours ago
293,Sr Data Analyst with Tableau / Domo,Keylent,Sacramento,ZipRecruiter,Full-time,Not mentioned,Not mentioned,0,,provided,provided,1 month ago


# formatting the data

In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv('data_raw.csv')

# Function to remove square brackets and format the entry
def format_years_of_experience(x):
    x = x.strip('[')
    x = x.strip(']')  
    return max(x)

# Apply the function to the 'years of experience' column
df['years of experience'] = df['years of experience'].apply(format_years_of_experience)

# Display the modified DataFrame
#print(df[['years of experience']])  # Display just the relevant column


In [2]:
df['date_posted'] = df['date_posted'].str.replace(' ago', '', regex=False)

In [3]:
# Function to remove square brackets and format the entry
def format_dateposted(x):
    return x.replace(' ago', '')

# Apply the function to the 'years of experience' column
df['date_posted'] = df['date_posted'].apply(format_dateposted)

# Display the modified DataFrame
#print(df[['date_posted']])  # Display just the relevant column

#df

In [4]:
import re
def convert_to_days(text):
    text = str(text)
    matches = re.findall(r'(\d+)\s*(\w+)', text)
    if matches[0][1] in ['day', 'days']:
        return int(matches[0][0])
    if matches[0][1] in ['hours', 'hour']:
        return 1 
    else:
        return 30

df['date_posted'] = df['date_posted'].apply(convert_to_days)
#df['date_posted'] = df['date_posted'].str.extract(r'(\d+)\s*(\w+)').apply(
#    lambda x: int(x[0]) * (1 if x[1] in ['day', 'days'] else 30), axis=1
#)

#df

In [5]:

def preprocess_salary(text):
     #print("Inside stage0")
     text = str(text)
     #print(text)
     text = text.replace("$",'')
     text = text.replace("'",'')
     matches = re.findall(r'(\d+)[kK][–-](\d+)[kK]', text)
     if (len(matches) >= 1):
        
        numbers = [int(num)*1000 for match in matches for num in match]
        return numbers
     matches = re.findall(r'(\d+)[–-](\d+)', text)
     if (len(matches) >= 1):
        
        numbers = [int(num) for match in matches for num in match]
        return numbers
     matches = re.findall(r'(\d+)', text)
     if (len(matches) == 1):
        #print(matches)
        return [int(matches[0]), int(matches[0])]
        #print("m3")
        numbers = [int(num) for match in matches for num in match]
        return numbers
     #print("Returned 0 ", text)
     return text 
    
def preprocess_salary_convert_hourly_wage_to_annual(text):
    #print("Inside hourlywage")
    #print(text)
    text = str(text)
    text = text.replace("[", '')
    text = text.replace("]", '')
    if "nan" in text: #missing values
        return [0,0]
    sub_text = text.split(",")
    comp_list = []
    #print("sub_text", sub_text)
    for sub in sub_text:
        try:
            comp = int(float(sub))
        except ValueError:
            comp = 0
        if(comp < 200):
            comp = comp * 40 *4 *12
        comp_list.append(comp)
    #print("list is ", comp_list)
    
    return comp_list 
        
    
# Apply the function to the 'years of experience' column
df['salary'] = df['salary'].apply(preprocess_salary)
df['salary'] = df['salary'].apply(preprocess_salary_convert_hourly_wage_to_annual)

# Display the modified DataFrame
#print(df[['salary']])  # Display just the relevant column


In [6]:
df[['lower_limit_salary', 'upper_limit_salary']] = pd.DataFrame(df['salary'].tolist(), index=df.index)

mean = df['lower_limit_salary'].mean()
std = df['lower_limit_salary'].std()

# Define the bounds for outliers
lower_bound = mean - 3 * std
upper_bound = mean + 3 * std
if(lower_bound < 0) :
    lower_bound = 0
#summary = df['lower_limit_salary'].describe()

outliers = (df['lower_limit_salary'] < lower_bound) | (df['lower_limit_salary'] > upper_bound)
#print(outliers)

# Print number of outliers
print(f"Number of outliers found: {outliers.sum()}")

# Replace outliers with 0
df['lower_limit_salary'] = np.where(outliers, 0, df['lower_limit_salary'])

mean = df['upper_limit_salary'].mean()
std = df['upper_limit_salary'].std()

# Define the bounds for outliers
lower_bound = mean - 3 * std
upper_bound = mean + 3 * std
if(lower_bound < 0) :
    lower_bound = 0
#summary = df['upper_limit_salary'].describe()
outliers = (df['upper_limit_salary'] < lower_bound) | (df['upper_limit_salary'] > upper_bound)
#filtered_df = df[df['upper_limit_salary'] > upper_bound]
##print(filtered_df)
#filtered_df = df[df['upper_limit_salary'] < lower_bound]










#print(filtered_df)
# Print number of outliers
print(f"Number of outliers found: {outliers.sum()}")

# Replace outliers with 0
df['upper_limit_salary'] = np.where(outliers, 0, df['upper_limit_salary'])


    

Number of outliers found: 7
Number of outliers found: 9


In [7]:
def preprocess_benefits(text):
    text = str(text)
    if 'provided' in text:
        return 1
    else:
        return 0
df['health_insurance'] = df['health_insurance'].apply(preprocess_benefits)
df['Dental_insurance'] = df['Dental_insurance'].apply(preprocess_benefits)
df['benefits_401k'] = df['benefits_401k'].apply(preprocess_benefits)
df['stocks'] = df['stocks'].apply(preprocess_benefits)
#df


In [8]:
df.to_csv('data_result.csv', index=True)

# preprocessing the data and eliminating uneccessary data

In [13]:
df = pd.read_csv('data_result.csv')
df.head(5)

Unnamed: 0.2,Unnamed: 0.1,Job Title,Company,location,site,jobtype,health_insurance,Dental_insurance,years of experience,salary,benefits_401k,stocks,date_posted,Unnamed: 0,lower_limit_salary,upper_limit_salary
0,0,Quantitative Analyst,American Century Investments,Santa Clara,LinkedIn,Full-time,0,0,0,"[259200, 316800]",0,1,3,,259200,316800.0
1,1,"Quantitative Marketing Analyst, Google, Search...",Google,Mountain View,LinkedIn,Full-time,1,0,0,"[87000, 128000]",0,1,10,,87000,128000.0
2,2,Quant Analyst,"Software Guidance & Assistance, Inc. (SGA, Inc.)",San Jose,LinkedIn,Full-time,0,0,3,"[96000, 109440]",0,1,23,,96000,109440.0
3,3,Quantitative Researcher,MSCI Inc.,Berkeley,Glassdoor,Full-time,1,0,0,"[149000, 194000]",1,1,5,,149000,194000.0
4,4,Associate Quantitative Analytics and Forecasti...,Exelixis,Alameda,LinkedIn,Full-time,1,1,7,"[171000, 243000]",1,1,2,,171000,243000.0


In [14]:
df = df.drop(columns=['Unnamed: 0', 'salary'])
df = df[(df['lower_limit_salary'] != 0) & (df['upper_limit_salary'] != 0)]
total_rows = df.shape[0]
print(total_rows)


Unnamed: 0,Unnamed: 0.1,Job Title,Company,location,site,jobtype,health_insurance,Dental_insurance,years of experience,benefits_401k,stocks,date_posted,lower_limit_salary,upper_limit_salary
0,0,Quantitative Analyst,American Century Investments,Santa Clara,LinkedIn,Full-time,0,0,0,0,1,3,259200,316800.0
1,1,"Quantitative Marketing Analyst, Google, Search...",Google,Mountain View,LinkedIn,Full-time,1,0,0,0,1,10,87000,128000.0
2,2,Quant Analyst,"Software Guidance & Assistance, Inc. (SGA, Inc.)",San Jose,LinkedIn,Full-time,0,0,3,0,1,23,96000,109440.0
3,3,Quantitative Researcher,MSCI Inc.,Berkeley,Glassdoor,Full-time,1,0,0,1,1,5,149000,194000.0
4,4,Associate Quantitative Analytics and Forecasti...,Exelixis,Alameda,LinkedIn,Full-time,1,1,7,1,1,2,171000,243000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1152,1152,"Principal Software Architect, App Store Analyt...",Apple,Cupertino,Careers At Apple,Full-time,1,1,0,1,1,30,207800,378700.0
1153,1153,"Staff, Software Engineer - Data",Walmart,Sunnyvale,Walmart Careers,Full-time and Part-time,1,1,6,1,1,30,143000,286000.0
1154,1154,"Senior, Data Scientist - Machine Learning Engi...",Walmart,Sunnyvale,Walmart Careers,Full-time and Part-time,1,1,5,1,1,30,117000,234000.0
1156,1156,"(USA) Senior, Software Engineer",Walmart,Fremont,ZipRecruiter,Full-time and Part-time,1,1,5,1,1,14,117000,234000.0


In [16]:
df.to_csv('final_data_result.csv', index=True)

# finding outliers and eliminating the rows

In [19]:
def find_outliers_iqr(data, column):
    Q1 = data[column].quantile(0.25)
    Q3 = data[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    outliers = data[(data[column] < lower_bound) | (data[column] > upper_bound)]
    return outliers, lower_bound, upper_bound

def find_grouped_outliers(df, salary_column):
    grouped_outliers = pd.DataFrame()
    
    # Group by 'years of experience'
    grouped = df.groupby('years of experience')
    
    for name, group in grouped:
        # Find outliers within each group using IQR
        outliers, lower_bound, upper_bound = find_outliers_iqr(group, salary_column)
        
        # Append the group outliers to the final result
        grouped_outliers = pd.concat([grouped_outliers, outliers])
        
    return grouped_outliers

# Function to remove outliers by years of experience
def remove_grouped_outliers(df, salary_column):
    cleaned_data = pd.DataFrame()
    
    # Group by 'years of experience'
    grouped = df.groupby('years of experience')
    
    for name, group in grouped:
        # Find outliers within each group using IQR
        outliers, lower_bound, upper_bound = find_outliers_iqr(group, salary_column)
        
        # Filter out the outliers and retain the non-outliers
        non_outliers = group[(group[salary_column] >= lower_bound) & (group[salary_column] <= upper_bound)]
        
        # Append the non-outlier data to the cleaned dataset
        cleaned_data = pd.concat([cleaned_data, non_outliers])
        
    return cleaned_data

# Find outliers in lower and upper limit salaries grouped by years of experience
lower_limit_grouped_outliers = find_grouped_outliers(df, 'lower_limit_salary')
upper_limit_grouped_outliers = find_grouped_outliers(df, 'upper_limit_salary')

# Display the number of outliers found
print(f"Lower Limit Salary Outliers: {lower_limit_grouped_outliers.shape[0]}")
print(f"Upper Limit Salary Outliers: {upper_limit_grouped_outliers.shape[0]}")

# Show a sample of the outliers
print("\nLower Limit Salary Outliers:")
print(lower_limit_grouped_outliers.head())

print("\nUpper Limit Salary Outliers:")
print(upper_limit_grouped_outliers.head())

# Remove outliers in lower and upper limit salaries grouped by years of experience
cleaned_lower_limit_salaries = remove_grouped_outliers(df, 'lower_limit_salary')
cleaned_data = remove_grouped_outliers(cleaned_lower_limit_salaries, 'upper_limit_salary')

# Display the number of rows after outlier removal
print(f"Original data rows: {df.shape[0]}")
print(f"Data rows after removing outliers: {cleaned_data.shape[0]}")

# Save the cleaned data to a new CSV file
cleaned_data.to_csv('cleaned_final_data_result.csv', index=False)

# Show a sample of the cleaned data
cleaned_data.head()

Lower Limit Salary Outliers: 24
Upper Limit Salary Outliers: 14

Lower Limit Salary Outliers:
     Unnamed: 0.1                                      Job Title  \
392           392                                  Data Engineer   
67             67                Quantitative Researcher (Power)   
76             76                        Quantitative Researcher   
106           106          Quantitative Researcher and Developer   
675           675  Senior Machine Learning Engineer (Applied ML)   

                       Company   location                       site  \
392                Robert Half  Calabasas                        Job   
67                      Comity   New York            ClimateTechList   
76            Goliath Partners    Chicago           Goliath Partners   
106  Open Systems Technologies   New York  Open Systems Technologies   
675                     EarnIn  Palo Alto                 Greenhouse   

       jobtype  health_insurance  Dental_insurance  years of exp

Unnamed: 0,Unnamed: 0.1,Job Title,Company,location,site,jobtype,health_insurance,Dental_insurance,years of experience,benefits_401k,stocks,date_posted,lower_limit_salary,upper_limit_salary
0,0,Quantitative Analyst,American Century Investments,Santa Clara,LinkedIn,Full-time,0,0,0,0,1,3,259200,316800.0
1,1,"Quantitative Marketing Analyst, Google, Search...",Google,Mountain View,LinkedIn,Full-time,1,0,0,0,1,10,87000,128000.0
3,3,Quantitative Researcher,MSCI Inc.,Berkeley,Glassdoor,Full-time,1,0,0,1,1,5,149000,194000.0
10,10,Principal Quantitative Analyst,Pacific Gas and Electric,Oakland,JobzMall,Full-time,0,0,0,0,0,30,147000,189000.0
13,13,"Analyst, Quantitative Data Operations",Fidelity Investments,San Francisco,JobzMall,Full-time,0,0,0,1,1,30,85000,125000.0
