# Job Application Helper (LinkedIn)
![transform](LinkedInToTxt.png)

This jupyter notebook will generate job application index page title and its contents for Linked in.


### 1.> Lets load required libraries.

In [13]:
import time
import traceback
from datetime import date
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import NoSuchElementException

### 2.> Config Area

In [14]:

#print(formatted_date)
lastSequence = 300


# Credentials
linkedInUID = ""
linkedInPWD = ""


if not linkedInUID or not linkedInPWD:
    raise ValueError("LinkedIn credentials must not be empty !")


# Wait time after login, just in case some non robotic manual interaction is requried.
waitTimeAfterLoginInSeconds = 60

# URLs 
urls = [
'https://www.linkedin.com/jobs/view/3847471903/',
'https://www.linkedin.com/jobs/view/3781149075/',
'https://www.linkedin.com/jobs/view/3843098956/',
'https://www.linkedin.com/jobs/view/3842570234/'    
]



# Output list declaration
indexList = []
contentList = []




# Other variable defined
jobTitle = ""
companyName = ""
indexLine = ""
contentLine01 = ""
contentLine02 = ""
contentLine03 = ""
logPositionToplevel = []
logPositionForloop = []






# Defining Exceptions
class CustomException(Exception):
    """Custom exception for element retrieval errors."""
    pass




In [15]:
# Find duplicate URLs if any.


duplicates = set()
distinct_strings = set()
for item in urls:
    if item in distinct_strings:
        duplicates.add(item)
    else:
        distinct_strings.add(item)

# Print duplicates
print("Duplicates:", duplicates)

# Convert the distinct strings set back to a list for further use (optional)
urls = list(distinct_strings)

# Print distinct strings
print("Distinct URLs:", urls)


Duplicates: set()
Distinct URLs: ['https://www.linkedin.com/jobs/view/3847471903/', 'https://www.linkedin.com/jobs/view/3843098956/', 'https://www.linkedin.com/jobs/view/3842570234/', 'https://www.linkedin.com/jobs/view/3781149075/']


In [16]:
def getElementText(driver, xPath, logBegin, logEnd):
    logList = []
    elementText = ""
    errorMessage = ""
    isSuccess = True

    try:
        # Attempt to find the element by XPath
        logList.append(logBegin)
        element = driver.find_element(By.XPATH, xPath)
        if element:
            elementText = element.text
        else:
            elementText = "NOTFOUND_" + logBegin + "_" + logEnd

    except NoSuchElementException as e:
        # If element is not found, set the error message
        errorMessage = f"Element not found: {e}"
        logList.append(errorMessage)
        isSuccess = False
    except Exception as e:
        # Generic exception handling
        errorMessage = f"An error occurred: {e}"
        logList.append(errorMessage)
        # Capture and log the full stack trace
        errorMessage += "\n" + traceback.format_exc()
        logList.append(errorMessage)
        isSuccess = False
    
    
    logList.append(logEnd)

    return (elementText, logList, isSuccess)

In [17]:
# ################################
# Set up Selenium ChromeDriver
# ################################
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)
logPositionToplevel.append("T01")





# ################################
# Login to linked in
# ################################
driver.get("https://www.linkedin.com/login")
username_element = driver.find_element(By.ID, "username")
password_element = driver.find_element(By.ID, "password")
logPositionToplevel.append("T02")

username_element.send_keys(linkedInUID)
password_element.send_keys(linkedInPWD)

login_button = driver.find_element(By.XPATH, "//button[@type='submit' and @aria-label='Sign in']")
login_button.click()
logPositionToplevel.append("T03")

WebDriverWait(driver, waitTimeAfterLoginInSeconds).until(
    EC.presence_of_element_located((By.XPATH, "//body[@class='render-mode-BIGPIPE nav-v2 ember-application icons-loaded boot-complete']"))
)
logPositionToplevel.append("T04")

print('Login successful, now visiting to the jobpost URL')
logPositionToplevel.append("T05")






# Get the current date in 'Mon DD, YYYY' format.
current_date = date.today()
currentFormattedDate = current_date.strftime("%b %d, %Y")
print(currentFormattedDate)
logPositionToplevel.append("T06")





# ######################################################################
# After llgin in successfull Iterate over each URL to scrap contents
# ######################################################################
for contentUrl in urls:
    logPositionForloop.clear()

    # Open the URL with Selenium
    driver.get(contentUrl)
    logPositionForloop.append("F01")

    # Wait for the job title element to be present
    try:
        # Find Job Title.
        jobTitleElement = WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.XPATH, '//h1[@class="t-24 t-bold job-details-jobs-unified-top-card__job-title"]'))
        )
        logPositionForloop.append("F02")
        if jobTitleElement:
            jobTitle = jobTitleElement.text
        else:
            jobTitle = "NOTFOUND_jobTitle"
        
        #print(jobTitle)
        time.sleep(10)  # Waits for 5 seconds    
        logPositionForloop.append("F03")
        

        
        
        
        
        
        
        
        # Find company name.
        companyName, logList, isSucess = getElementText(driver=driver, 
                       xPath='/html[1]/body[1]/div[5]/div[3]/div[2]/div[1]/div[1]/main[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[3]/div[1]/a[1]',
                       logBegin='F04',
                       logEnd='F05')
        logPositionForloop.append(logList)
        if not isSucess:
            raise CustomException('CustomException : Something went wrong')
        #print(companyName)





        # Construct indexLine
        lastSequence = lastSequence + 1
        #print("indexLine-02 : " + str(lastSequence))
        indexLine = str(lastSequence) + '. ' + companyName + ' - ' + jobTitle + ' ['+str(currentFormattedDate)+']'
        logPositionForloop.append("F06")
        #print("indexLine-03 : " + str(lastSequence) + '. ' + companyName)
        #print("indexLine-04 : " + str(lastSequence) + '. ' + companyName + ' - ' + jobTitle)
        #print("indexLine-05 : " + str(currentFormattedDate))    
        #print("indexLine-06 : " + str(lastSequence) + '. ' + companyName + ' - ' + jobTitle + ' ['+str(currentFormattedDate)+']')
        #print(indexLine)
        #print('-' * len(indexLine))


        # Adding indexline to final output list
        indexList.append(indexLine)
        logPositionForloop.append("F07")







        # #####################################
        # Start scraping contents
        # #####################################

        # Content line 01
        contentLine01 = jobTitle
        logPositionForloop.append("F08")


        # Content line 02
        contentLine02, logList, isSucess = getElementText(driver=driver, 
                       xPath='/html[1]/body[1]/div[5]/div[3]/div[2]/div[1]/div[1]/main[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[3]/div[1]',
                       logBegin='F09',
                       logEnd='F10')
        logPositionForloop.append(logList)
        if not isSucess:
            raise CustomException('CustomException : Something went wrong')
        #print("\nContent Line 02")
        #print(contentLine02)






        # Content line 03
        contentLine03, logList, isSucess = getElementText(driver=driver, 
                       xPath='/html[1]/body[1]/div[5]/div[3]/div[2]/div[1]/div[1]/main[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[4]/ul[1]/li[1]/span[1]',
                       logBegin='F11',
                       logEnd='F12')
        logPositionForloop.append(logList)
        if not isSucess:
            raise CustomException('CustomException : Something went wrong')
        lines = contentLine03.split('\n')
        contentLine03 = ' '.join(line for line in lines if not line.startswith("Matches your"))
        #print("\nContent Line 03")
        #print(contentLine03)








        # Add content to output list
        contentList.append('\n' + indexLine 
                           + '\n' + ('-' * len(indexLine)) 
                           + '\n\t' + contentUrl 
                           +  '\n\n\t' + contentLine01
                           +  '\n\t' + contentLine02
                           +  '\n\t' + contentLine03
                           +  '\n\n\n\n\n\n\n\n'
                           )
        logPositionForloop.append("F15")


        finished = True
    except CustomException:
        finished = False
        print("\nSomething went wrong for job post URL [" + contentUrl + "]")
        combinedLogs = logPositionToplevel + logPositionForloop
        print(combinedLogs)
    finally:
        # Close the Selenium WebDriver code is moved out of for look, hence commenting here.
        # driver.quit()
        if finished:
            print("\nFinished scraping ["+ contentUrl +"]")
        
















# ######################## 
# Output Index and contents.
# ######################## 
print('\n\n\nFinished scapping all information in requried format, printing output\n\n\n\n\n')
for indexItem in indexList:
    print(indexItem)

print('\n\n\n\n\n')
for contentItem in contentList:
    print(contentItem)










# Quiting driver
print(logPositionToplevel)
driver.quit()



Login successful, now visiting to the jobpost URL
Mar 12, 2024

Finished scraping [https://www.linkedin.com/jobs/view/3847471903/]

Finished scraping [https://www.linkedin.com/jobs/view/3843098956/]

Finished scraping [https://www.linkedin.com/jobs/view/3842570234/]

Finished scraping [https://www.linkedin.com/jobs/view/3781149075/]



Finished scapping all information in requried format, printing output





301. Hays - Solutions Architect [Mar 12, 2024]
302. NCS Group Australia - Cloud Architect [Mar 12, 2024]
303. Otic Group - Solution Architect [Mar 12, 2024]
304. Aurecon - IT Solution Architect - Workday [Mar 12, 2024]







301. Hays - Solutions Architect [Mar 12, 2024]
----------------------------------------------
	https://www.linkedin.com/jobs/view/3847471903/

	Solutions Architect
	Hays · Melbourne, Victoria, Australia · 22 hours ago · 28 applicants
	On-site Contract Mid-Senior level









302. NCS Group Australia - Cloud Architect [Mar 12, 2024]
-------------------------