# Block 0 (General settings)

## Libraries and Data

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException, TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time
import pandas as pd
import random
import pyautogui
from datetime import datetime
import os
import logging
from tabulate import tabulate
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from selenium.webdriver import ActionChains
from IPython.display import display

# pip install tabulate

# ------------------------------ Data ---------------------------------

credentials_csv = pd.read_csv("credentials.csv")
df_limits = pd.read_csv('settings.csv')
linkedin_user_name = credentials_csv.iat[0, 1] 
linkedin_password = credentials_csv.iat[1, 1]  
titles = df_limits['Title'].tolist()
countries = df_limits.columns.tolist()[1:]
csv_path = "questions_answers.csv"

page_limits = {
    row['Title']: {
        'Portugal': int(row['Portugal']),
        'Spain': int(row['Spain']),
        'France': int(row['France']),
        'Ireland': int(row['Ireland'])
    }
    for _, row in df_limits.iterrows()
}


if os.path.exists("database.csv"):
    df = pd.read_csv("database.csv")
else:
    df = pd.DataFrame(columns=[
        "ID", "Job_Title", "Company_Name", "Country_of_search", "Full_Location",
        "Salary_Range", "Job_Placement", "Job_Mode", "Job_Level", "Apply_Type",
        "External_Link", "Job_Date", "Hiring_Team_Name", "Hiring_Team_Info",
        "Hiring_Team_Link", "Job_Description", "Skills_Required",
        "Company_Description", "Company_Sector", "Status", "Company_Platform_Link"
    ])

if os.path.exists("apply_table.csv"):
    df2 = pd.read_csv("apply_table.csv")
else:
    df2 = pd.DataFrame(columns=["ID", "Job_Title", "Company_Name", "Country_of_search","External_Link"])
    df2.to_csv("apply_table.csv", index=False)

if os.path.exists("database_short.csv"):
    df3 = pd.read_csv("database_short.csv")
else:
    df3 = pd.DataFrame(columns=[
        "ID", "Job_Title", "Company_Name", "Country_of_search", "Job_id", "Status"
    ])

## Driver Settings

In [2]:
# ------------------------------ Driver Settings --------------------

user_agents = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
    "Mozilla/5.0 (iPhone; CPU iPhone OS 16_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.2 Mobile/15E148 Safari/604.1",
    "Mozilla/5.0 (Linux; Android 13; SM-S918B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Mobile Safari/537.36"
]

random_user_agent = random.choice(user_agents)
options = Options()
# options.add_argument("--headless=new")  # Run in headless mode
# options.add_argument("--disable-gpu")   # Disable GPU acceleration
# options.add_argument("--blink-settings=imagesEnabled=false")  # Disable images
# options.add_argument("--disable-extensions")  # Disable extensions
# options.add_argument("--disable-infobars")  # Removes automation infobar
# options.add_argument("--start-maximized")  # Maximizes performance              
# options.add_argument("--log-level=3")
# options.add_argument("--disable-infobars")
# options.add_argument("--disable-background-networking")
# options.add_argument("--disable-popup-blocking")
# options.add_argument("--disable-blink-features=AutomationControlled")  # Prevents detection
# options.add_argument("--enable-automation")  # Official setting for Selenium automation
# options.add_experimental_option("excludeSwitches", ["enable-automation"])
# options.add_experimental_option("useAutomationExtension", False)
# Load pages faster by blocking images
# options.add_argument("--blink-settings=imagesEnabled=false")

# Ignore certificate errors
options.add_argument("--ignore-certificate-errors")

# mobile_emulation = {"deviceName": "iPhone 12 Pro"}                          # Mobile emulation
# options.add_experimental_option("mobileEmulation", mobile_emulation)        # Mobile emulation
# options.add_argument(f"user-agent={random_user_agent}")                     # Pick a random User-Agent

options.add_argument("--timeout=300")  # Set timeout for slow connections
# options.add_argument("--disable-extensions")
# options.add_argument("--disable-plugins")
# options.add_argument("--disable-software-rasterizer")

service = Service("/usr/local/bin/chromedriver")  # Set correct path

## Humanising

In [3]:
def human_delay(min_sec=0.3, max_sec=1.0):
    time.sleep(random.uniform(min_sec, max_sec))

def human_hover(driver, element):
    try:
        ActionChains(driver).move_to_element(element).perform()
        human_delay(0.2, 0.6)
    except Exception as e:
        print("🟠 human_delay")

def human_type(element, text):
    for char in text:
        element.send_keys(char)
        time.sleep(random.uniform(0.05, 0.15))

def human_scroll_to(driver, element):
    driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", element)
    human_delay(0.2, 0.6)

# Block 1 (Sign In)

In [4]:
# ------------------------------ Block 1 (Sign In) --------------------

def login_and_go_to_jobs(driver, linkedin_user_name, linkedin_password):
       
    try:
        try:
            accept_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Accept')]"))
            )

            human_scroll_to(driver, accept_button)
            human_hover(driver, accept_button)
            human_delay()            
            accept_button.click()
            
        except:
            print("🛑 accept_button")

        sign_in_link = WebDriverWait(driver, 20).until(
            EC.element_to_be_clickable((By.XPATH, "//a[contains(text(), 'Sign in')]"))
        )
        human_scroll_to(driver, sign_in_link)
        human_hover(driver, sign_in_link)
        human_delay(1, 2)
        sign_in_link.click()
        

        username_input = WebDriverWait(driver, 20).until(
            EC.visibility_of_element_located((By.ID, "username"))
        )
        username_input.clear()
        human_type(username_input, linkedin_user_name)

        password_input = WebDriverWait(driver, 20).until(
            EC.visibility_of_element_located((By.ID, "password"))
        )
        password_input.clear()
        human_delay(1, 2)
        human_type(password_input, linkedin_password)

        try:
            remember_me_label = WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable((By.XPATH, "//label[@for='rememberMeOptIn-checkbox']"))
            )
            human_hover(driver, remember_me_label)
            human_delay()
            remember_me_label.click()
            
        except:
            print("🛑 remember_me")

        human_delay(1, 2)

        sign_in_button = WebDriverWait(driver, 200).until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Sign in')]"))
        )
        human_scroll_to(driver, sign_in_button)
        human_hover(driver, sign_in_button)
        human_delay()
        sign_in_button.click()

        print("✅ Sigh in")

        time.sleep(60)
        return True

    except Exception as e:
        print("🛑 Sigh in")
        return False


# Block 2 (Search settings)

In [5]:
# ------------------------------ Function for setting titles  -------------------------------

def search_job_title(driver, title, wait_time=30):
    print("🟢 Searching process")
    try:
        jobs_button = WebDriverWait(driver, 30).until(
            EC.element_to_be_clickable((By.XPATH, "//a[contains(@href, '/jobs/')]"))
        )
        human_scroll_to(driver, jobs_button)
        human_hover(driver, jobs_button)
        human_delay()
        jobs_button.click()

        search_box = WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.XPATH, "//input[@aria-label='Search by title, skill, or company']"))
        )
        human_scroll_to(driver, search_box)
        human_hover(driver, search_box)
        human_delay()
        search_box.clear()

            
        human_type(search_box, title)
        human_delay(0.4, 1.0)
        search_box.send_keys(Keys.ENTER)

        print(f"🟢 Title {title}")

        human_delay(wait_time - 5, wait_time)

        return True

    except Exception as e:

        print("🛑")

        return False


# ------------------------------ Function for setting country  -------------------------------

def set_location_and_search(driver, country, wait_time=30):
    try:

        search_box = WebDriverWait(driver, 30).until(
            EC.presence_of_element_located((By.XPATH, "//input[@aria-label='City, state, or zip code']"))
        )
        human_scroll_to(driver, search_box)
        human_hover(driver, search_box)
        human_delay()
        search_box.clear()
        human_type(search_box, country)

        search_button = WebDriverWait(driver, 30).until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(@class, 'jobs-search-box__submit-button')]"))
        )
        human_scroll_to(driver, search_button)
        human_hover(driver, search_button)
        human_delay()
        search_button.click()

        human_delay(wait_time - 5, wait_time)

        print(f"🟢 Country {country}")

        return True

    except Exception as e:

        print("🛑")

        return False


# Block 3 (Extract job info)

## Block 3.1 (Initual data capturing)

In [6]:
def extract_job_info(driver, country):

    print("---------- Initual data capturing ----------")

    current_url = driver.current_url

    try:
        Apply_Type = None
        Job_Title = None
        Company_Name = None
        Job_Location = None
        Salary_Range = None
        Job_Placement = None
        Job_Mode = None
        Job_Level = None
        Apply_Type = None
        Hiring_Team_Name = None
        Hiring_Team_Info = None
        Hiring_Team_Link = None
        Job_Description = None
        Skills_Required = None
        Company_Description = None
        Company_Sector = None
        Company_Platform_Link = None
        job_id = None


        if "currentJobId=" in current_url:
            try:
                job_id = current_url.split("currentJobId=")[1].split("&")[0]
            except IndexError:
                job_id = None
            
        # ------------------------------ Company Info ---------------------------------
        try:
            parent_div = driver.find_element(By.CLASS_NAME, "job-details-jobs-unified-top-card__company-name")
            company_element = parent_div.find_element(By.TAG_NAME, "a")
            Company_Name = company_element.text
            Company_Platform_Link = company_element.get_attribute("href")

            print(f"🟢 {Company_Name}")
            print(f"🟢 {Company_Platform_Link}")

        except:
            Company_Name = "N/A"
            Company_Platform_Link = "N/A"

            print(f"🛑 {Company_Name}")
            print(f"🛑 {Company_Platform_Link}")
    
        # ------------------------------ Job Title --------------------------------- 
        try:
            parent_div = parent_div.find_element(By.XPATH, "//div[contains(@class, 'job-details-jobs-unified-top-card__job-title')]/h1/a")
            Job_Title = parent_div.text

            print(f"🟢 {Job_Title}")
        except:
            Job_Title = "N/A"

            print(f"🛑 {Job_Title}")
    
        # ------------------------------ Job Location --------------------------------- 
        try:
            parent_div = driver.find_element(By.CLASS_NAME, "job-details-jobs-unified-top-card__primary-description-container")
            parent2_div = parent_div.find_element(By.CLASS_NAME, "t-black--light")
            parent3_div = parent2_div.find_elements(By.TAG_NAME, "span")
            target_element = parent3_div[0]
            Job_Location = target_element.text

            print(f"🟢 {Job_Location}")
        except:
            Job_Location = "N/A"
            
            print(f"🛑 {Job_Location}")
    
        # ------------------------------ Job Info --------------------------------- 
        try:
            parent_div = driver.find_elements(By.XPATH, "//div[contains(@class, 'mt2 mb2')]/ul/li")
            parent2_div = parent_div[0]
            parent3_div = parent2_div.find_element(By.TAG_NAME, "span")
            parent4_div = parent3_div.find_elements(By.TAG_NAME, "span")
    
            Salary_Range = "N/A"
            Job_Placement = "N/A"
            Job_Mode = "N/A"
            Job_Level = "N/A"
    
            if len(parent4_div) == 7:
                Salary_Range = parent4_div[0].text
                Job_Placement = parent4_div[3].text
                Job_Mode = parent4_div[5].text
                Job_Level = parent4_div[6].text
            elif len(parent4_div) == 6:
                Job_Placement = parent4_div[2].text
                Job_Mode = parent4_div[4].text
                Job_Level = parent4_div[5].text
            elif len(parent4_div) == 5:
                Job_Placement = parent4_div[2].text
                Job_Mode = parent4_div[4].text
            elif len(parent4_div) == 2:
                Job_Placement = parent4_div[0].text
                Job_Mode = parent4_div[1].text
            elif len(parent4_div) == 1:
                Job_Placement = parent4_div[0].text


            print(f"🟢 {Salary_Range}")
            print(f"🟢 {Job_Placement}")
            print(f"🟢 {Job_Mode}")
            print(f"🟢 {Job_Level}")
    

        except Exception as e:
            Salary_Range = "N/A"
            Job_Placement = "N/A"
            Job_Mode = "N/A"
            Job_Level = "N/A"

            print(f"🛑 {Salary_Range}")
            print(f"🛑 {Job_Placement}")
            print(f"🛑 {Job_Mode}")
            print(f"🛑 {Job_Level}")
    
        # ------------------------------ Hiring Team --------------------------------- 
    
        try:
            parent_div = driver.find_element(By.CLASS_NAME, "hirer-card__hirer-information")
            parent2_div = parent_div.find_element(By.TAG_NAME, "a")
            Hiring_Team_Link = parent2_div.get_attribute("href")
            parent3_div = parent2_div.find_element(By.TAG_NAME, "span")
            parent4_div = parent3_div.find_element(By.TAG_NAME, "strong") 
            Hiring_Team_Name = parent4_div.text
            parent2_div = parent_div.find_element(By.CLASS_NAME, "linked-area")
            parent3_div = parent2_div.find_element(By.CLASS_NAME, "text-body-small")
            Hiring_Team_Info = parent3_div.text

            print(f"🟢 {Hiring_Team_Link}")
            print(f"🟢 {Hiring_Team_Name}")
            print(f"🟢 {Hiring_Team_Info}")
                
        except:
            Hiring_Team_Link = "N/A"
            Hiring_Team_Name = "N/A"
            Hiring_Team_Info = "N/A"

            print(f"🛑 {Hiring_Team_Link}")
            print(f"🛑 {Hiring_Team_Name}")
            print(f"🛑 {Hiring_Team_Info}")

    
        # ------------------------------ Job Description --------------------------------- 
        try:
            WebDriverWait(driver, 60).until(
                EC.presence_of_element_located((By.CLASS_NAME, "jobs-description-content"))
            )
            parent_div = driver.find_element(By.CLASS_NAME, 'jobs-description-content')
            Job_Description = parent_div.text.strip()

            print("🟢 Job_Description")
                
        except:
            Job_Description = "N/A"

            print("🛑 Job_Description")
    
        # ------------------------------ Skills Required --------------------------------- 
        try:
            WebDriverWait(driver, 60).until(
                EC.presence_of_element_located((By.CLASS_NAME, "pt5"))
            )
            parent_div = driver.find_element(By.CLASS_NAME, "pt5")
            Skills_Required = "N/A"
    
            try:
                company_element = parent_div.find_element(By.TAG_NAME, "a")
                Skills_Required = company_element.text
            except:
                pass
    
            try:
                parent2_div = parent_div.find_elements(By.CLASS_NAME, "job-details-how-you-match__skills-item-wrapper.display-flex.flex-row.pt4")
                Skills_Required_1 = parent2_div[0].find_element(By.TAG_NAME, "a").text
                Skills_Required_2 = parent2_div[1].find_element(By.TAG_NAME, "a").text
                Skills_Required = Skills_Required_1 + ", " + Skills_Required_2
            except:
                pass

            print("🟢 Skills_Required")
    
        except:
            Skills_Required = "N/A"

            print("🛑 Skills_Required")

        # ------------------------------ Company Description ---------------------------------
        try:
            parent_div = driver.find_element(By.CLASS_NAME, "jobs-company__company-description")
            Company_Description = parent_div.text.strip()

            print("🟢 Company_Description")
        except:
            Company_Description = "N/A"

            print("🛑 Company_Description")
    
        # ------------------------------ Company Sector ---------------------------------
        try:
            parent_div = driver.find_element(By.CLASS_NAME, "jobs-company__box")
            parent2_div = parent_div.find_element(By.CLASS_NAME, "t-14")
            Company_Sector = parent2_div.text

            print(f"🟢 {Company_Sector}")
        except:
            Company_Sector = "N/A"

            print(f"🛑 {Company_Sector}")
    
        # ------------------------------ Apply Type ---------------------------------
        try:
            button_type = driver.find_element(By.XPATH, "//span[text()='Easy Apply']")
            if button_type:
                Apply_Type = "Easy Apply"
        except NoSuchElementException:
            pass
        
        if not Apply_Type:
            try:
                button_type = driver.find_element(By.XPATH, "//span[text()='Apply']")
                if button_type:
                    Apply_Type = "External Apply"
            except NoSuchElementException:
                pass
        
        if not Apply_Type:
            try:
                button_type = driver.find_element(By.ID, "jobs-apply-see-application-link")
                if button_type:
                    Apply_Type = "Already Applied"
            except NoSuchElementException:
                pass
        
        if Apply_Type:
            print(f"🟢 {Apply_Type}")

    except Exception as e:
            print("🛑")

    job_data = {
        "Job_Title": Job_Title or "N/A",
        "Company_Name": Company_Name or "N/A",
        "Country_of_search": country or "N/A",
        "Full_Location": Job_Location or "N/A",
        "Salary_Range": Salary_Range or "N/A",
        "Job_Placement": Job_Placement or "N/A",
        "Job_Mode": Job_Mode or "N/A",
        "Job_Level": Job_Level or "N/A",
        "Apply_Type": Apply_Type or "N/A",
        "Hiring_Team_Name": Hiring_Team_Name or "N/A",
        "Hiring_Team_Info": Hiring_Team_Info or "N/A",
        "Hiring_Team_Link": Hiring_Team_Link or "N/A",
        "Job_Description": Job_Description or "N/A",
        "Skills_Required": Skills_Required or "N/A",
        "Company_Description": Company_Description or "N/A",
        "Company_Sector": Company_Sector or "N/A",
        "Company_Platform_Link": Company_Platform_Link or "N/A",
        "job_id": job_id or "N/A",
    }
    print("------------------------------")
    return job_data

## Block 3.2 (Second data capturing + record data into a df)

In [7]:
# ------------------------------------------------------------------------------------
# ------------------------------ Function to record data -----------------------------
# ------------------------------------------------------------------------------------

def insert_job_data(df, df3, job_data, External_Link, Status):

    print("---------- Second data capturing ----------")
        
    new_id = len(df) + 1  
    new_id3 = len(df3) + 1  
    job_date = datetime.now().strftime("%d.%m.%Y")
    
    new_row = {
        "ID": new_id,
        "Job_Title": job_data.get("Job_Title", ""),
        "Company_Name": job_data.get("Company_Name", ""),
        "Country_of_search": job_data.get("Country_of_search", ""),
        "Full_Location": job_data.get("Full_Location", ""),
        "Salary_Range": job_data.get("Salary_Range", ""),
        "Job_Placement": job_data.get("Job_Placement", ""),
        "Job_Mode": job_data.get("Job_Mode", ""),
        "Job_Level": job_data.get("Job_Level", ""),
        "Apply_Type": job_data.get("Apply_Type", ""),
        "External_Link": External_Link,
        "Job_Date": job_date,
        "Hiring_Team_Name": job_data.get("Hiring_Team_Name", ""),
        "Hiring_Team_Info": job_data.get("Hiring_Team_Info", ""),
        "Hiring_Team_Link": job_data.get("Hiring_Team_Link", ""),
        "Job_Description": job_data.get("Job_Description", ""),
        "Skills_Required": job_data.get("Skills_Required", ""),
        "Company_Description": job_data.get("Company_Description", ""),
        "Company_Sector": job_data.get("Company_Sector", ""),
        "Company_Platform_Link": job_data.get("Company_Platform_Link", ""),
        "Status": Status,
    }

    new_row3 = {
        "ID": new_id3,
        "Job_Title": job_data.get("Job_Title", ""),
        "Company_Name": job_data.get("Company_Name", ""),
        "Country_of_search": job_data.get("Country_of_search", ""),
        "Job_id": job_data.get("job_id", ""),
        "Status": Status,
    }

    df3 = pd.concat([df3, pd.DataFrame([new_row3])], ignore_index=True)
    df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)

    print("🟢")
    print("-------------------------------------------")
    
    return df, df3

# Block 4 (Applying decision, trigger function)

## Main function

In [8]:
def decide_apply_action(driver):
    
    Status = None
    External_Link = None

    try:
        if driver.find_element(By.ID, "jobs-apply-see-application-link"):
            print("     ✅ Already Applied")
            Status = "Previously Applied"
            External_Link = "N/A"
            return Status, External_Link

    except NoSuchElementException:
        pass


    try:
        parent_div = driver.find_element(By.CLASS_NAME, "jobs-apply-button--top-card")
        if parent_div.find_element(By.XPATH, ".//span[text()='Apply']"):
            print("     ✅ External Apply")
            External_Link, Status = external_apply(driver, job_data, Status, External_Link)
            return Status, External_Link

    except NoSuchElementException:
        pass
    
    try:
        parent_div = driver.find_element(By.CLASS_NAME, "jobs-apply-button--top-card")
        if parent_div.find_element(By.XPATH, ".//span[text()='Easy Apply']"):
            print("     ✅ Easy Apply")
            # previos line (handle_easy_apply(driver, Status, External_Link))
            External_Link, Status = handle_easy_apply(driver, Status, External_Link)
            return Status, External_Link

    except NoSuchElementException:
        pass



    print("     🛑 No button")
    return Status, External_Link


In [9]:
## Check for dublicates

In [10]:
def is_job_already_recorded(job_data, csv_path="database_short.csv"):


    job_id = str(job_data.get("job_id", ""))
    
    if not job_id or job_id == "N/A":
        print("⚠️ Invalid or missing job_id in job_data.")
        return False

    try:
        df = pd.read_csv(csv_path)
        return job_id in df["Job_id"].astype(str).values
    except FileNotFoundError:
        print(f"📁 CSV file '{csv_path}' not found. Assuming job_id is new.")
        return False
    except Exception as e:
        print(f"❌ Error while checking job_id: {e}")
        return False

# Block 5 (Appling processes)

## Block 5.0 (Properties for Block 5)

In [11]:
# -----------------------------------------------------------------------------------
# ------------------------------ Zero tier functions --------------------------------
# -----------------------------------------------------------------------------------

# ------------------------------ Possible selectors ---------------------------------

click_dismiss_button_selectors = [
    (By.XPATH, "//button[@aria-label='Dismiss']"), 
    (By.CLASS_NAME, "artdeco-modal__dismiss"),
    (By.CSS_SELECTOR, "button.artdeco-modal__dismiss"),  
]

click_apply_button_selectors = [
    (By.CLASS_NAME, "jobs-apply-button"), 
    (By.XPATH, "//button[contains(@class, 'jobs-apply-button')]"),
]

click_discard_button_selectors = [
    (By.XPATH, "//button[@data-control-name='discard_application_confirm_btn']"), 
    (By.XPATH, "//button[@aria-label='Discard']"),
    (By.XPATH, "//button[span[text()='Discard']]"),
    (By.CSS_SELECTOR, "button[data-control-name='discard_application_confirm_btn']"),
]

click_next_button_selectors = [
    (By.XPATH, "//button[@aria-label='Continue to next step']"),
    (By.CSS_SELECTOR, "button[data-easy-apply-next-button]"),
    (By.CSS_SELECTOR, "button[data-live-test-easy-apply-next-button]"),
]

click_review_button_selectors = [
    (By.XPATH, "//button[@aria-label='Review your application']"),
    (By.XPATH, "//button[span[text()='Review']]"),
   # (By.CSS_SELECTOR, "button[data-live-test-easy-apply-review-button]"),
]

click_submit_button_selectors = [
    (By.XPATH, "//button[@aria-label='Submit application']"),
    (By.XPATH, "//button[span[text()='Submit application']]"),
   # (By.CSS_SELECTOR, "button[data-live-test-easy-apply-submit-button]"),
]

click_done_button_selectors = [
    (By.XPATH, "//button[@aria-label='Dismiss']"),
    (By.XPATH, "//button[span[text()='Dismiss']]"),
    (By.XPATH, "//button[span[text()='Done']]"),
    (By.XPATH, "//button[span[text()='Discard']]"), 
]

date_posted_filter_selectors = [
    (By.XPATH, "//button[contains(text(), 'Date posted')]"),
    (By.XPATH, "//button[contains(@aria-label, 'Date posted filter')]"),
    (By.CSS_SELECTOR, "button#searchFilter_timePostedRange"),
]

past_24_hours_option_selectors = [
    (By.XPATH, "//label[contains(., 'Past 24 hours')]"),
    (By.XPATH, "//label[@for='timePostedRange-r86400']"),
    (By.CSS_SELECTOR, "label[for='timePostedRange-r86400']"),
]

apply_date_filter_button_selectors = [
    (By.XPATH, "//button[contains(., 'Show') and contains(., 'results')]"),
    (By.XPATH, "//button[span[contains(text(), 'Show') and contains(text(), 'results')]]"),
    (By.CSS_SELECTOR, "button.artdeco-button--primary.ml2"),  # broader fallback
]

# ------------------------------ Generic Click Function -----------------------------

def click_button(selectors, button_name, timeout=5, retries=2, delay=2):
    for attempt in range(1, retries + 2):
        for by, selector in selectors:
            try:
                button = WebDriverWait(driver, timeout).until(
                    EC.element_to_be_clickable((by, selector))
                )
                button.click()
                return True
            except TimeoutException:
                print(f"⚠️ Timeout for selector: {by} = {selector}")
            except Exception as e:
                print(f"⚠️ Error: {e}")
        time.sleep(delay)
    print(f"❌ Fail (click_{button_name})")
    return False
# ------------------------------ Use Button Functions -------------------------------

def click_apply_button():
    return click_button(click_apply_button_selectors, "apply_button")

def click_dismiss_button():
    return click_button(click_dismiss_button_selectors, "dismiss_button")

def click_discard_button():
    return click_button(click_discard_button_selectors, "discard_button")

def click_next_button():
    return click_button(click_next_button_selectors, "next_button")

def click_review_button():
    return click_button(click_review_button_selectors, "review_button")

def click_submit_button():
    return click_button(click_submit_button_selectors, "submit_button")

def click_done_button():
    return click_button(click_done_button_selectors, "done_button")

def click_date_posted():
    return click_button(date_posted_filter_selectors, "date_posted")

def click_past_24_hours():
    return click_button(past_24_hours_option_selectors, "24_hours_option")

def apply_date_filter():
    return click_button(apply_date_filter_button_selectors, "apply_date_filter")


## Block 5.1 (External Apply)

In [12]:
def external_apply(driver, job_data, Status, External_Link):

    print("--------------- External Apply ---------------")

    df2 = pd.read_csv("apply_table.csv")
    new_id = len(df2) + 1

    try:
        click_apply_button()  # Assuming this clicks the main Apply button
        time.sleep(20)  # Give time for new tab to open
        
        # Switch to the new tab
        new_tab = driver.window_handles[1]
        driver.switch_to.window(new_tab)
        
        time.sleep(5)  # Let the page load
        External_Link = driver.current_url

        print("🟢 External Job Link:", External_Link)

        driver.close()

        # Switch back to the original tab
        old_tab = driver.window_handles[0]
        driver.switch_to.window(old_tab)
        
        
        Status = "External link saved"

        new_row = {
            "ID": new_id,
            "Job_Title": job_data.get("Job_Title", ""),
            "Company_Name": job_data.get("Company_Name", ""),
            "Country_of_search": job_data.get("Country_of_search", ""),
            "External_Link": External_Link,
        }

        df2 = pd.concat([df2, pd.DataFrame([new_row])], ignore_index=True)

        df2.to_csv("apply_table.csv", index=False)

        print("🟢 Status:", Status)
    
    except Exception as e:
        External_Link = "N/A"
        Status = "Failed to fetch external link"

        print("🛑 Status:", Status)
        print("🛑 External_Link:", External_Link)
    
    return External_Link, Status

## Block 5.2 (Platform Apply)

In [13]:
def handle_easy_apply(driver, Status, External_Link):
    try:
        print("------------------ Easy Apply ----------------")

        # Step 1: Click Apply
        if not click_apply_button():
            print("🟢 click_apply_button")
            return

        # Initialize click counter
        step_clicks = 0

        # Step 2: Loop through pages
        while True:
            time.sleep(2) 

            # Check if Submit button is available (End Condition)
            try:
                submit_button = driver.find_element(By.XPATH, "//button[@aria-label='Submit application']")
                if submit_button.is_displayed():
                    print("🟢 submit button found")
                    click_submit_button()
                    print("🟢 click_submit_button")
                    
                    handle_loader_timeout_and_modal(driver)
                    
                    click_done_button()
                    print("🟢 click_done_button")

                    External_Link = "N/A"
                    Status = "Successfully applied via platform "
                    print(f"🟢 {External_Link}")
                    print(f"🟢 {Status}")
                    return External_Link, Status

            except NoSuchElementException:
                pass  # Submit not found, continue checking

            # Check for error messages
            error_present = False
            try:
                error_element = driver.find_element(By.XPATH, "//span[contains(@class, 'artdeco-inline-feedback__message')]")
                if error_element.is_displayed():
                    error_present = True
                    print("🟢 error is found")
            except NoSuchElementException:
                print("🟣 next element 2")
                error_present = False

            try:
                # Next button
                next_button = driver.find_element(By.XPATH, "//button[@aria-label='Continue to next step']")
                if next_button.is_displayed():
                    if error_present:
                        print("🟢 next is found + error")
                        df_questions_answers = load_answers_df("questions_answers.csv")
                        print("🟢 df_questions_answers")
                        process_questions(driver, df_questions_answers)
                        print("🟢 process_questions")
                        extract_visible_questions_and_save(driver, "saved_questions.csv")
                        print("🟢 extract_visible_questions_and_save")

                    click_next_button()
                    print("🟢 click_next_button")

                    step_clicks += 1
                    if step_clicks > 5:
                        print("🛑 Too many steps, exiting...")
                        extract_visible_questions_and_save(driver, "saved_questions.csv")
                        print("🟢 extract_visible_questions_and_save")
                        break

                    continue
            except NoSuchElementException:
                pass

            try:
                # Review button
                review_button = driver.find_element(By.XPATH, "//button[@aria-label='Review your application']")
                if review_button.is_displayed():
                    if error_present:
                        print("🟢 review is found + error")
                        df_questions_answers = load_answers_df("questions_answers.csv")
                        print("🟢 df_questions_answers")
                        process_questions(driver, df_questions_answers)
                        print("🟢 process_questions")
                        extract_visible_questions_and_save(driver, "saved_questions.csv")
                        print("🟢 extract_visible_questions_and_save")

                    click_review_button()
                    print("🟢 click_review_button")

                    step_clicks += 1
                    if step_clicks > 5:
                        print("🛑 Too many steps, exiting...")
                        extract_visible_questions_and_save(driver, "saved_questions.csv")
                        print("🟢 extract_visible_questions_and_save")
                        break

                    continue
            except NoSuchElementException:
                pass

            # If nothing was clickable
            print("🛑 Escape 1")
            break

        # Escape sequence after too many steps or failure
        
        click_dismiss_button()
        print("🛑 Escape 2")
        time.sleep(10)
        click_discard_button()
        print("🛑 Escape 3")
        External_Link = "N/A"
        Status = "Failed to answer questions platform apply"
        print(f"🛑 {External_Link}")
        print(f"🛑 {Status}")
        return External_Link, Status

    except Exception as e:
        print("🛑 Escape 4")
        click_discard_button()
        print("🛑 Escape 5")
        time.sleep(10)
        click_dismiss_button()
        print("🛑 Escape 6")
        External_Link = "N/A"
        Status = "Failed to process platform apply"
        print(f"🛑 {External_Link}")
        print(f"🛑 {Status}")
        return External_Link, Status


### Block 5.2.1 (Handle question function)

In [14]:
# -----------------------------
# Main Function
# -----------------------------
def process_questions(driver, df_questions_answers):

    print("------------------ process_questions ----------------")
        
    questions_filled = []

    form_blocks = driver.find_elements(By.XPATH, "//div[contains(@class,'fb-dash-form-element')]")
    for block in form_blocks:
        try:
            question_text = ""
            # Try <legend> first
            element = block.find_element(By.XPATH, ".//legend|.//label")
            if element:
                question_text = element.text.strip()

            if not question_text:
                continue

            print("🟢 Try legend first")

            # Step 2: Match question
            matched_row, score = match_question(question_text, df_questions_answers)

            print(f"🟢\n🔍 Found match ({score:.2f}):")
            print("🟢 - Extracted:", question_text)
            print("🟢 - Matched:  ", matched_row['Question'])

            # Step 3: Detect input type
            input_type = detect_input_type(block)
            print(f" - Input Type: {input_type}")

            # Step 4: Fill answer
            answer_column = {
                "input": "Input Answer",
                "radio": "Radio Answer",
                "dropdown": "Dropdown Answer"
            }.get(input_type, None)

            if answer_column and pd.notna(matched_row[answer_column]):
                fill_answer(driver, block, matched_row[answer_column], input_type)
                print("🟢 fill_answer")
                questions_filled.append((question_text, matched_row[answer_column]))

                print("🟢 questions_filled.append")
            else:
                
                print("🛑 questions_filled.append")

        except Exception as e:
                print("🛑 process_questions")

    return questions_filled

#### Block 5.2.1.1 (Handle question function)

In [15]:
# -----------------------------
# Load CSV and define matcher
# -----------------------------
def load_answers_df(csv_path):
    print("                    🟢 load_answers_df")
    return pd.read_csv(csv_path)

def match_question(question_text, df_questions_answers):
    print("                    🟢 match_question 1")
    vectorizer = TfidfVectorizer().fit(df_questions_answers['Question'].tolist() + [question_text])
    print("                    🟢 match_question 2")
    df_vectors = vectorizer.transform(df_questions_answers['Question'].tolist())
    print("                    🟢 match_question 3")
    web_vector = vectorizer.transform([question_text])
    print("                    🟢 match_question 4")
    scores = cosine_similarity(web_vector, df_vectors).flatten()
    print("                    🟢 match_question 5")
    best_idx = scores.argmax()
    print("                          🟢 match_question")    
    return df_questions_answers.iloc[best_idx], scores[best_idx]

# -----------------------------
# Detect input type
# -----------------------------
def detect_input_type(element):
    
    try:
        # Check for dropdown <select> element inside block
        if element.find_elements(By.TAG_NAME, "select"):
            print("                          🟢 detect_input_type - dropdown") 
            return "dropdown"

        # Check for radio buttons
        input_els = element.find_elements(By.XPATH, ".//input[@type='radio']")
        if input_els:
            print("                          🟢 detect_input_type - radio") 
            return "radio"

        # Check for text inputs
        input_els = element.find_elements(By.XPATH, ".//input[@type='text']")
        if input_els:
            print("                          🟢 detect_input_type - input")  
            return "input"
            
    except Exception as e:
        print("                          🛑 detect_input_type - unknown input")

# -----------------------------
# Fill answer into form
# -----------------------------
def fill_answer(driver, form_element, answer, input_type):
            
    print("                          ------------------ fill_answer ----------------")
    try:
        if input_type == "input":
            fallback_answer = answer if pd.notna(answer) else "5"
            input_box = form_element.find_element(By.TAG_NAME, "input")
            human_scroll_to(driver, input_box)
            human_hover(driver, input_box)
            input_box.clear()
            human_delay(0.3, 0.6)
            human_type(input_box, fallback_answer)

        elif input_type == "radio":
            fallback_answer = answer if pd.notna(answer) else "Yes"
            labels = form_element.find_elements(By.XPATH, ".//label")
            clicked = False
            for label in labels:
                if label.text.strip().lower() == fallback_answer.strip().lower():
                    human_scroll_to(driver, label)
                    human_hover(driver, label)
                    human_delay(0.2, 0.5)
                    label.click()
                    clicked = True
                    break
            if not clicked and labels:
                human_scroll_to(driver, labels[0])
                human_hover(driver, labels[0])
                labels[0].click()  # Fallback: just click the first radio option

        elif input_type == "dropdown":
            select_el = form_element.find_element(By.TAG_NAME, "select")
            human_scroll_to(driver, select_el)
            human_hover(driver, select_el)
            options = select_el.find_elements(By.TAG_NAME, "option")
            fallback_answer = answer if pd.notna(answer) else None
            clicked = False
            if fallback_answer:
                for option in options:
                    if option.text.strip().lower() == fallback_answer.strip().lower():
                        human_delay(0.2, 0.5)
                        option.click()
                        clicked = True
                        break
            if not clicked and len(options) > 1:
                human_delay(0.2, 0.5)
                options[1].click()  # Select first real option (skipping placeholder at index 0)

        else:
            print("Unknown input type, skipping...")

        human_delay(0.4, 1.0)

    except Exception as e:
        print("Error filling answer:", e)

# -----------------------------
# Store questions separatly
# -----------------------------


def extract_visible_questions_and_save(driver, output_path="saved_questions.csv", answered_path="questions_answers.csv"):
    import os
    import pandas as pd

    questions = []

    # Step 1: Collect from <legend> > span[aria-hidden="true"]
    legend_spans = driver.find_elements(By.XPATH, "//legend//span[@aria-hidden='true']")
    for span in legend_spans:
        text = span.text.strip()
        if text:
            questions.append(text)

    # Step 2: Collect from <label> > span[aria-hidden="true"]
    labels = driver.find_elements(By.XPATH, "//label")
    for label in labels:
        label_for = label.get_attribute("for")
        if label_for:
            associated_input = driver.find_elements(By.ID, label_for)
            if associated_input:
                input_type = associated_input[0].get_attribute("type")
                if input_type in ["radio", "checkbox"]:
                    continue  # Skip option labels
        visible_span = label.find_elements(By.XPATH, ".//span[@aria-hidden='true']")
        if visible_span:
            text = visible_span[0].text.strip()
            if text:
                questions.append(text)
        else:
            text = label.text.strip()
            if text:
                questions.append(text)

    # Step 3: Remove duplicates (preserve order)
    unique_new_questions = list(dict.fromkeys(questions))

    # Step 4: Load existing saved questions
    existing_questions = set()
    if os.path.exists(output_path):
        try:
            existing_df = pd.read_csv(output_path)
            existing_questions = set(existing_df['Question'].dropna().tolist())
        except Exception as e:
            print("⚠️ Failed to read existing saved CSV:", e)

    # Step 5: Load already answered questions
    answered_questions = set()
    if os.path.exists(answered_path):
        try:
            answered_df = pd.read_csv(answered_path)
            answered_questions = set(answered_df['Question'].dropna().tolist())
        except Exception as e:
            print("⚠️ Failed to read answered questions CSV:", e)

    # Step 6: Filter out questions already answered or already saved
    new_unanswered_questions = [
        q for q in unique_new_questions
        if q not in existing_questions and q not in answered_questions
    ]

    # Step 7: Combine with existing saved questions
    final_questions = list(existing_questions.union(new_unanswered_questions))

    # Step 8: Save updated list
    pd.DataFrame({"Question": sorted(final_questions)}).to_csv(output_path, index=False)
    print(f"\n📁 Saved {len(final_questions)} total unique unanswered questions to '{output_path}'")

    # Optional: Print new ones
    if new_unanswered_questions:
        print("🆕 New questions added:")
        for q in new_unanswered_questions:
            print(" -", q)
    else:
        print("✅ No new unanswered questions found.")

    return new_unanswered_questions


In [19]:
def handle_loader_timeout_and_modal(driver, timeout=30):
    try:
        print("⏳ Checking if loader is stuck...")
        # Wait until loader disappears, timeout if still visible after `timeout` seconds
        WebDriverWait(driver, timeout).until_not(
            EC.presence_of_element_located((By.CLASS_NAME, "jobs-loader"))
        )
        print("✅ Loader cleared.")
        return

    except TimeoutException:
        print("⚠️ Loader stuck for too long. Refreshing page...")
        driver.refresh()
        time.sleep(5)  # Give page time to load

        # Now wait for modal or job list
        try:
            print("🕵️ Checking for 'Application sent' modal...")
            WebDriverWait(driver, 10).until(
                EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[role="dialog"].artdeco-modal'))
            )
            print("✅ Modal detected. Attempting to dismiss...")

            try:
                done_button.click()
                time.sleep(2)
                print("🧹 Modal dismissed.")
            except NoSuchElementException:
                print("⚠️ Close button not found in modal.")
        except TimeoutException:
            print("❌ Modal not found. Proceeding to fetch job list.")

        # After handling modal or not, continue to job listings
        print("🔄 Reloading job listings...")
    except TimeoutException:
        print("❌ No modal found after refresh.")

# Main Script

In [None]:
driver = webdriver.Chrome(service=service, options=options)
driver.get("https://www.linkedin.com")

# ------------------------------ Login ----------------------

try:
    login_and_go_to_jobs(driver, linkedin_user_name, linkedin_password)

except Exception as e:
    print("Error:", e)

# ------------------------------ Set title and country---------------

try:
    for title in titles:
        search_job_title(driver, title, wait_time=30)

        for country in countries:        
            
            set_location_and_search(driver, country, wait_time=30)
        
            WebDriverWait(driver, 200).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "li.ember-view.occludable-update"))
            )
            time.sleep(10)  
                
            current_page = driver.current_url
# ---------------------- Set 24 hours jobs only ---------         
            click_date_posted()
            time.sleep(5)
            click_past_24_hours()
            time.sleep(5)
            apply_date_filter()
# ---------------------- Go Through jobs on the current page ---------
            max_pages = page_limits.get(title, {}).get(country, 1)  # Default to 1 if not found
            current_page_num = 1
            while current_page_num <= max_pages:

                job_elements = driver.find_elements(By.CSS_SELECTOR, "li.ember-view.occludable-update")
                        
                for job in job_elements:

# ---------------------- Append (Back up) info to csv ---------------            
                    df.to_csv("database.csv", index=False)
                    df = pd.read_csv("database.csv")
                    
                    df3.to_csv("database_short.csv", index=False)
                    df3 = pd.read_csv("database_short.csv")
# ---------------------- Append (Back up) info to csv ---------------  
                       
                    try:
                        try:
                            time.sleep(3)
                            job.click()
                            WebDriverWait(driver, 3).until(
                                EC.presence_of_element_located((By.CLASS_NAME, "job-details-jobs-unified-top-card__company-name"))
                            )
                        except:
                            driver.switch_to.window(Current_page)
                            time.sleep(10)
                            job.click()
                            WebDriverWait(driver, 3).until(
                                EC.presence_of_element_located((By.CLASS_NAME, "job-details-jobs-unified-top-card__company-name"))
                            )


# ---------------------- Collect info (Block 3.1) -------------------- 
                        try:

                            print("🟠🟠🟠🟠 Duplicate check 🟠🟠🟠🟠")
                            if driver.find_element(By.ID, "jobs-apply-see-application-link"):
                                print("🟠🟠🟠🟠 Job is Already applied with Easy Apply   🟠🟠🟠🟠")
                                continue
                                
                        except NoSuchElementException:
                            try:
                                job_data = extract_job_info(driver, country)
                                
                                print("🟢 extract_job_info Completed")
                                
                                if is_job_already_recorded(job_data):
                                    print(f"🟠 Job ID {job_data['job_id']} already exists. Skipping... ")         
                                    continue
                                else:
                                    print(f"🟢 Job ID {job_data['job_id']} is new. Proceeding to save...")
                            
                            except:
                                print("extract_job_info Failed")
                                job_data = None
                                pass
        # ---------------------- Applying  -----------------------------------
        
                            Status, External_Link = decide_apply_action(driver)
        
        # ---------------------- Record info (Block 3.2) ---------------------
                            try:
                                df, df3 = insert_job_data(df, df3, job_data, External_Link, Status)
                                print("insert_job_info Complited")
                            except:
                                print("insert_job_info Failed")
                                pass
# ---------------------- Move to the next page  ----------------------
                    
                    except Exception as e:
                        print(f"Error clicking job: {e}")
                        break
                try:                
                    next_button = driver.find_element(By.CSS_SELECTOR, "button.jobs-search-pagination__button--next")
                    if next_button.is_enabled():
                        next_button.click()
                        print("📌 Navigating to the next page...")
                        time.sleep(5)  # Wait for the next page to 
                        current_page_num += 1  
                    else:
                        print("✅ No more pages to navigate.")
                        break  # Exit the loop if there's no next page
                        
                except NoSuchElementException:
                    print("✅ No 'Next' button found. Moving to next country...")
                    break  # Exit the loop if the button is not found

                print("🎉 Job search completed for all titles and countries!")
                
except Exception as e:
    print("Error during search:", e)

✅ Sigh in
🟢 Searching process
🟢 Title Data Scientist
🟢 Country Ireland
⚠️ Timeout for selector: xpath = //button[contains(text(), 'Date posted')]
⚠️ Timeout for selector: xpath = //button[contains(@aria-label, 'Date posted filter')]
⏳ Checking if loader is stuck...
✅ Loader cleared.
🟠🟠🟠🟠 Duplicate check 🟠🟠🟠🟠
---------- Initual data capturing ----------
🟢 Project Foundry
🟢 https://www.linkedin.com/company/the-project-foundry/life
🟢 Test Manager (Part-time - 2 days per week)
🟢 Dublin, County Dublin, Ireland · 14 hours ago · 75 applicants
Promoted by hirer · Company review time is typically 1 week
🟢 N/A
🟢 Remote
🟢 Part-time
🟢 Mid-Senior level
🟢 https://www.linkedin.com/in/richard-ward-605615110
🟢 Richard Ward
🟢 Talent Partner | Recruitment Consultant | Key Accounts Manager | All-round half-decent bloke
🟢 Job_Description
🟢 Skills_Required
🟢 Company_Description
🟢 IT Services and IT Consulting 11-50 employees 67 on LinkedIn
🟢 Easy Apply
------------------------------
🟢 extract_job_info Compl