In [None]:
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium import webdriver
import pandas as pd
import regex
import time

try:
    driver_path = r"C:\Program Files (x86)\ChromeDriver\chromedriver.exe"
    website = "https://www.linkedin.com/my-items/saved-jobs/?cardType=APPLIED"
    
    chrome_options = webdriver.ChromeOptions()
    # chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1280,1440")
                            
    service = Service(driver_path)
    driver = webdriver.Chrome(service=service)

    driver.get(website)

    login_txt = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, "//input[contains(@id, 'username')]")))
    password_txt = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, "//input[contains(@id, 'password')]")))

    user_login = input("Enter LinkedIn Email or Phone: ")
    user_password = input("Enter LinkedIn Password: ")
    
    login_txt.send_keys(user_login)
    password_txt.send_keys(user_password)

    driver.find_element(By.XPATH, "//button[contains(@data-litms-control-urn, 'login-submit')]").click()
    
    def scrape_jobs_applied_to(driver):
        job_title = []
        job_company = []
        job_country = []
        job_type = []

        job_card_remove_verified_pattern = regex.compile(r"\s*[\n,]*\s*Verified")
        job_card_pattern = "^(.*)$" # Apparently you can't use regex flags with compiled patterns lol. Doesn't matter too much here, just wanted to be consistent.
        job_location_pattern = regex.compile(r"(.*)(?=\s+\(Remote\)|\s+\(On-Site\)|\s+\(Hybrid\)?)", regex.IGNORECASE)
        job_type_pattern = regex.compile(r"(Remote|On-Site|Hybrid)", regex.IGNORECASE)
        
        while True: 
            applied_job_results = WebDriverWait(driver, 8).until(EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'pt3 pb3 t-12 t-black--light')]")))
            
            for job in applied_job_results:

                # Cleaning up the job title with regex since the extracted text contained "Verified" and some line breaks.
                cleaned_job_card = regex.sub(job_card_remove_verified_pattern, "", job.text).strip()
                job_info_matches = regex.findall(job_card_pattern, cleaned_job_card, regex.MULTILINE)
                job_title_extr = job_info_matches[0] # First string in each card which is always the job title.
                job_company_extr = job_info_matches[1] # Second string in each card which is always the company name.
                job_location_and_type_extr = job_info_matches[2] # Third string in each card which is always the location.
                
                job_location_match = regex.search(job_location_pattern, job_location_and_type_extr)
                job_type_match = regex.search(job_type_pattern, job_location_and_type_extr)
    
                if job_location_match:
                    job_location_extr = job_location_match.group(0)
                else:
                    job_location_extr = ""
                    
                if job_type_match:
                    job_type_extr = job_type_match.group(0)
                else:
                    job_type_extr = ""
            
                job_title.append(job_title_extr)
                job_company.append(job_company_extr)
                job_country.append(job_location_extr)
                job_type.append(job_type_extr)

                print(f"{job_title_extr}\n{job_company_extr}\n{job_location_extr}\n{job_type_extr}\n")
            
            next_btn = driver.find_element(By.XPATH, "//button[contains(@class, 'artdeco-pagination__button--next')]")
            if "disabled" in next_btn.get_attribute("class"):
                break
            WebDriverWait(driver, 10).until(EC.element_to_be_clickable(next_btn))
            next_btn.click()
            time.sleep(2)
    
        df_jobs = pd.DataFrame({
            'TITLE': job_title,
            'COMPANY': job_company,
            'COUNTRY': job_country,
            'LOCATION_TYPE': job_type
        })
    
        df_jobs.to_csv("linkedin_jobs_applied_to.csv",index=False)

        return df_jobs
        
    time.sleep(8)
    
    scrape_jobs_applied_to(driver)
    
except Exception as e:
    print(f"An error occurred: {e}")

finally:
   driver.quit()