# Linkedin Jobs Webscrapping and Email Automation
- Juan Esteban Rincón Poveda

- Other projects: [Porfolio](https://juanrinconp.github.io/portafolio/)

This project automates the process of job searching on LinkedIn by building a Python web scraper that will extract the last job postings and automatically send them as email notifications to a list of recipients at given hour.



In [1]:
from selenium import webdriver
from bs4 import BeautifulSoup
import time
import pandas as pd
import datetime
import yagmail
import os

import warnings
warnings.filterwarnings("ignore")

In [2]:
pathout = './data/'
if not os.path.exists(pathout):
    os.mkdir(pathout) 

In [4]:
lista = pd.read_excel(pathout + 'lista.xlsx')
lista['Puesto2'] = lista['Puesto'].str.replace(' ' , '%20')
jobs = lista['Puesto2'].tolist()
#De la lista, sacamos los puestos para introducirlos a la URL.

In [5]:
#Lista con información de la persona con el puesto que busca y el email.
lista.head(1)

Unnamed: 0,nombre,email,Puesto,lugar,Puesto2
0,juan,juan.rinconp@hotmail.com,Data Analyst,Bogotá,Data%20Analyst


In [8]:
#Ubicación predeterminada: Bogotá.
#Se crea un archivo por persona según sus preferencias.


lugar ='Bogota%2C%20D.C.%2C%20Capital%20District%2C%20Colombia'
for job in jobs:
    
    url = f'https://www.linkedin.com/jobs/search/?currentJobId=3464244316&f_TPR=r86400&geoId=102361989&keywords={job}&location={lugar}'
    wd = webdriver.Chrome(executable_path='./chromedriver.exe')
    wd.get(url)
    soup = BeautifulSoup(wd.page_source, 'html.parser')

    job_postings = soup.find_all("div", {"class": "base-card relative w-full hover:no-underline focus:no-underline base-card--link base-search-card base-search-card--link job-search-card"})

    data = []


    for job_posting in job_postings:
        try:
            job_title = job_posting.find('span', class_='sr-only').get_text().strip()
        except AttributeError:
            job_title = None  

        try:
            c_url = job_posting.find('a', class_='base-card__full-link absolute top-0 right-0 bottom-0 left-0 p-0 z-[2]').get_attribute_list('href')
        except AttributeError:
            c_url = None        
        try:
            company = job_posting.find('a', class_='hidden-nested-link').get_text().strip()
        except AttributeError:
            company = None     
        
        data.append({
            'Trabajo': job_title,
            'Link': c_url,
            'Empresa': company })
        df = pd.DataFrame(data)
        df['Link'] = df['Link'].astype(str)
        df['Link'] = df['Link'].str[2:]
        df['Link'] = df['Link'].str[:-2]
    
        filename = f'{job}.csv'
        df.to_csv(filename, index=False)
    

In [10]:
while True:
    if datetime.datetime.now().hour == 12 and datetime.datetime.now().minute == 4:
        df = lista

        for index, row in df.iterrows():
            email = yagmail.SMTP(user="juanes.rinconp10@gmail.com", password="mtnsutmjivnxjfjm")
            email.send(to=row['email'],
                       subject=f"Alerta de vacantes para {row['Puesto']}",
                       contents=f"Buenos dias {row['nombre']}, adjunto encontraras las ofertas de empleo para el puesto de {row['Puesto']} para el día de hoy. Buen día. \n\n Atentemente,  \n \n Juanes R. \n Data Scientist \n 300000000",
                       attachments=[f"{row['Puesto2']}.csv" , "recomendaciones.jpg"])
            print("Correos Enviados!")
    time.sleep(60)

Example of the file sent to the people on the list:

In [12]:
df.head(5)

Unnamed: 0,Trabajo,Link,Empresa
0,Analista de trade marketing,https://co.linkedin.com/jobs/view/analista-de-...,Eficacia
1,Coordinador/Profesional E-Commerce,https://co.linkedin.com/jobs/view/coordinador-...,Multiempleos S.A.
2,Executive Assistant,https://co.linkedin.com/jobs/view/executive-as...,Virtual Emily
3,Growth Lead,https://co.linkedin.com/jobs/view/growth-lead-...,Lulo bank
4,Líder de desarrollo de negocios digitales,https://co.linkedin.com/jobs/view/l%C3%ADder-d...,Michael Page
