<a href="https://colab.research.google.com/github/institutohumai/cursos-python/blob/master/Scraping/3_Selenium_y_xpath/ejercicio/linkedin_solucion.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" data-canonical-src="https://colab.research.google.com/assets/colab-badge.svg"></a>

In [None]:
# @title Esta celda permite instalar y utilzar selenium en google colab (Mayo 2023)
# @markdown Ignorar celda en Github
# https://github.com/googlecolab/colabtools/issues/3347#issuecomment-1387453484
%%capture output
%%shell
# Ubuntu no longer distributes chromium-browser outside of snap
# Proposed solution: https://askubuntu.com/questions/1204571/how-to-install-chromium-without-snap
# Add debian buster
cat > /etc/apt/sources.list.d/debian.list <<'EOF'
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster.gpg] http://deb.debian.org/debian buster main
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster-updates.gpg] http://deb.debian.org/debian buster-updates main
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-security-buster.gpg] http://deb.debian.org/debian-security buster/updates main
EOF

# Add keys
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys DCC9EFBF77E11517
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 648ACFD622F3D138
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 112695A0E562B32A

apt-key export 77E11517 | gpg --dearmour -o /usr/share/keyrings/debian-buster.gpg
apt-key export 22F3D138 | gpg --dearmour -o /usr/share/keyrings/debian-buster-updates.gpg
apt-key export E562B32A | gpg --dearmour -o /usr/share/keyrings/debian-security-buster.gpg

# Prefer debian repo for chromium* packages only
# Note the double-blank lines between entries
cat > /etc/apt/preferences.d/chromium.pref << 'EOF'
Package: *
Pin: release a=eoan
Pin-Priority: 500

Package: *
Pin: origin "deb.debian.org"
Pin-Priority: 300

Package: chromium*
Pin: origin "deb.debian.org"
Pin-Priority: 700
EOF

# Install chromium and chromium-driver
apt-get update
apt-get install chromium chromium-driver

# Install selenium
pip install selenium

## Consigna: a partir de la implementación de la clase Linkedin crear las siguientes funcionalidades:
- log_in: ingresar página de inicio de sesión e ingresar a la cuenta.
- upload: ingresar a la página principal y agregar un posteo con el contenido que se pasa como parámetro.

Notas: 
- Existe la posibilidad que al iniciar sesión te solicite un código por mail, se debe poder ingresar el código y completar el inicio de sesión.
- En caso de utilizar Colab se puede realizar driver.save_screenshot('image.png') para obtener el estado actual

In [6]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from getpass import getpass

class Scraper:
    """Clase base para scrapear basada en selenium. 
    Poner la instalación vista en la clase para usar en Colab"""

    def __init__(self, user:str, passw:str):
        self.user = user
        self.passw = passw

    def log_in(self):
        pass

    def upload(self):
        pass

    @staticmethod
    def init_chrome(
        headless=False,
        no_images=False,
        datadir=None,
        mobile=False,
    ):

        chrome_options = Options()
        chrome_options.add_argument("--headless") if headless else None
        chrome_options.add_argument(f"user-data-dir={datadir}") if datadir else None
        chrome_options.add_argument("start-maximized")
        chrome_options.add_experimental_option(
            "prefs", {"profile.managed_default_content_settings.images": 2}
        ) if no_images else None

        if mobile:
            chrome_options.add_experimental_option(
                "mobileEmulation",
                {
                    "deviceMetrics": {"width": 360, "height": 640, "pixelRatio": 3.0},
                    "userAgent": (
                        "Mozilla/5.0 (Linux; Android 9; Redmi Note 7) "
                        "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 "
                        "Mobile Safari/537.36"
                    ),
                    "touch": True,
                },
            )
            chrome_options.add_experimental_option("w3c", False)

            useragent = (
                "Mozilla/5.0 (Linux; Android 9; Redmi Note 7) AppleWebKit/537.36 "
                "(KHTML, like Gecko) Chrome/78.0.3904.108 Mobile Safari/537.36"
            )
        else:
            useragent = (
                "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like "
                "Gecko) Chrome/73.0.3683.103 Safari/537.36"
            )
        
        chrome_options.add_argument(
            f"--user-agent={useragent}"
        ) if useragent else None

        chrome_options.add_argument("--no-sandbox")
        chrome_options.add_argument("--disable-gpu")

        driver = webdriver.Chrome(
            service=Service(),
            options=chrome_options,
        )

        print("Running Chrome.")
        return driver

class Linkedin(Scraper):
    def __init__(self, headless=True, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.driver = self.init_chrome(headless=headless)
        self.base_url = "https://www.linkedin.com"

    def log_in(self):
        # Ingresa usuario
        self.driver.get("https://www.linkedin.com/login?")
        el = self.driver.find_element(By.XPATH, value='//input[@id="username"]')
        el.send_keys(self.user)

        # Ingresa password
        password = self.driver.find_element(By.XPATH, value='//input[@id="password"]')
        password.send_keys(self.passw)

        # Click boton loggin
        boton_log_in = self.driver.find_element(By.CLASS_NAME, value="btn__primary--large")
        boton_log_in.click()

    def insert_code(self):
        # Obtener el input de codigo
        code = self.driver.find_element(By.XPATH, value='//*[@placeholder="Enter code"]')
        code.send_keys(getpass('Codigo:'))

        # Click boton de submit
        boton_submit = self.driver.find_element(By.XPATH, value='//button[@type="submit"]')
        boton_submit.click()

        # Click boton confirmar si se solicita
        self.driver.implicitly_wait(3)
        boton_confirm = self.driver.find_elements(By.XPATH, value='//button[text()="Confirm"]')
        if len(boton_confirm) != 0:
            boton_confirm.click()


    def upload(self, content: str):
        # Click boton iniciar posteo
        boton_post = self.driver.find_element(By.XPATH, value='//button/span[text()="Start a post" or text()="Crear publicación"]')
        boton_post.click()

        # Contenido post
        self.driver.implicitly_wait(3)
        contenido_post = self.driver.find_element(By.XPATH, value='//div[@data-placeholder="What do you want to talk about?" or @data-placeholder="¿Sobre qué quieres hablar?"]')
        contenido_post.send_keys(content)

        # Boton postear
        self.driver.implicitly_wait(2)
        boton_post2 = self.driver.find_element(By.XPATH, value='//span[text()="Post" or text()="Publicar"]')
        boton_post2.click()

In [3]:
from getpass import getpass

lk = Linkedin(headless=True, user='mtsgrinberg@gmail.com', passw=getpass())

 ········




Current google-chrome version is 94.0.4606
Get LATEST chromedriver version for 94.0.4606 google-chrome
Driver [/home/cerebrock/.wdm/drivers/chromedriver/linux64/94.0.4606.113/chromedriver] found in cache
  driver = webdriver.Chrome(


Running Chrome.


In [None]:
lk.log_in()

In [None]:
lk.upload('Post hecho desde Python con Selenium!')