## Crawler Q Concursos

- [ ] Login e autenticação
- [ ] Resgate de disciplinas e matérias
- [ ] Cálculo de métricas de cada matérias
- [ ] Aramazenamento local em dataframes
- [ ] Raspagem de questões
- [ ] Resolução de questões

In [9]:
from selenium import webdriver
import uuid
from time import sleep

In [2]:
# crawler config

capabilities = {
    "browserName": "firefox",
    "browserVersion": "90.0",
    "selenoid:options": {
        "enableVNC": True,
        "enableVideo": False
    }
}

In [74]:
# Base Class

class Craweler:
    
    def __init__(self):
        self.__driver = webdriver.Remote(
            command_executor="http://localhost:4444/wd/hub",
            desired_capabilities=capabilities
        )
        
    def _find_element_by_xpath(self, xpath):
        return self.__driver.find_element_by_xpath(xpath)
        
    def _send_keys_in_element(self, element, content_string):
        return element.send_keys(content_string)
    
    def _request_url(self, url):
        return self.__driver.get(url)
    
    def driver(self):
        return self.__driver
        
        
# Classe Herdada

class Qconcursos(Craweler):
    
    def __init__(self, config_dict):
        self.__driver = super().__init__()
        self.__configs = config_dict
        self.__infos = {
            "auth": {
                "url": "https://www.qconcursos.com/conta/entrar",
                "email_input_xpath" : '//*[@id="login_email"]',
                "password_input_xpath": '//*[@id="login_password"]',
                "button_input_xpath": '//*[@id="login_form"]/input[3]'
            },
            "logout": {
                "url": "https://www.qconcursos.com/usuario",
                "first_button": '//*[@id="user-dropdown"]',
                "second_button": '//*[@id="js-current-user"]/div[2]/div/a[3]'
            },
            "subjects": {
                "url": "https://www.qconcursos.com/questoes-do-enem/disciplinas",
                "body": '/html/body/div[3]/main/div[2]/div'
                
            }
        }
        
    def auth_user(self):
        self._request_url(self.__infos["auth"]["url"])
        
        input_email_element = self._find_element_by_xpath(self.__infos["auth"]["email_input_xpath"])
        input_password_element = self._find_element_by_xpath(self.__infos["auth"]["password_input_xpath"])
        input_button_element = self._find_element_by_xpath(self.__infos["auth"]["button_input_xpath"])
        
        
        self._send_keys_in_element(input_email_element, self.__configs["email"])
        self._send_keys_in_element(input_password_element, self.__configs["password"])
        
        return input_button_element.click()
    
    
    def logout_user(self):
        self._request_url(self.__infos["logout"]["url"])
        
        first_button = self._find_element_by_xpath(self.__infos["logout"]["first_button"])
        first_button.click()
        second_button = self._find_element_by_xpath(self.__infos["logout"]["second_button"])
        return second_button.click()
    
    def get_subjects(self):
        self._request_url(self.__infos["subjects"]["url"])
        
        body_content = self._find_element_by_xpath(self.__infos["subjects"]["body"])
        subject_elements = body_content.find_elements_by_class_name('q-discipline-item')
        
        
        response = []

        for element_div in subject_elements:
    
            question_itens = element_div.find_elements_by_class_name('q-items')[0]
            link_element = question_itens.find_element_by_tag_name('a')
            
            dict_element = {}
            
            dict_element['title'] = element_div.find_element_by_tag_name('h3').text
            dict_element['url'] = link_element.get_attribute('href')
            dict_element['number'] = link_element.text
            
            response.append(dict_element)
            
        print(response)
        pass
        
        
        
        
    def show_credentials(self):
        print(self.__configs)
        
    

In [75]:
crawler = Qconcursos({ "email": "datajus.services@gmail.com", "password": "D@T@jus2021" })

In [76]:
crawler.auth_user()
sleep(5)
crawler.get_subjects()
sleep(5)
crawler.logout_user()

[{'title': 'Português', 'url': 'https://www.qconcursos.com/questoes-do-enem/disciplinas/letras-portugues/questoes', 'number': '972'}, {'title': 'Matemática', 'url': 'https://www.qconcursos.com/questoes-do-enem/disciplinas/matematica-matematica/questoes', 'number': '1.159'}, {'title': 'História', 'url': 'https://www.qconcursos.com/questoes-do-enem/disciplinas/historia-historia/questoes', 'number': '518'}, {'title': 'Geografia', 'url': 'https://www.qconcursos.com/questoes-do-enem/disciplinas/geografia-geografia/questoes', 'number': '456'}, {'title': 'Biologia', 'url': 'https://www.qconcursos.com/questoes-do-enem/disciplinas/biologia-biologia/questoes', 'number': '519'}, {'title': 'Química', 'url': 'https://www.qconcursos.com/questoes-do-enem/disciplinas/quimica-quimica/questoes', 'number': '386'}, {'title': 'Física', 'url': 'https://www.qconcursos.com/questoes-do-enem/disciplinas/fisica-fisica/questoes', 'number': '375'}, {'title': 'Inglês', 'url': 'https://www.qconcursos.com/questoes-do