# Webscraping
## Portal da Queixa
### Changes may be required due to Portal da Queixa's continous updates

(c) Nickolas Lago 2021 - Rev. 1.0

### Load packages and do the initializations

In [29]:
# Load libraries
import numpy as np
import pandas as pd
from urllib.request import urlopen
from bs4 import BeautifulSoup
import ssl
from selenium import webdriver
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.firefox.options import Options
import time

In [30]:
# Allow not verified SSL (Secure Socket Layer) certificates to be opened
ssl._create_default_https_context = ssl._create_unverified_context

In [31]:
# Get Firefox options (configurations)
options = Options()

# Add this argument to Options to hide Firefox (make it not visible)
# options.add_argument('--headless') 

### Reading dataset of complains urls

Using the dataset previously built, we will now retrieve the complains from each url

In [32]:
complainsUrl = pd.read_excel("reviews_url.xlsx")

### Creating an empty DataFrame to store the information about each complain

In [33]:
# Creating a DataFrame to store all complains URL to scrap it later
complainsData = pd.DataFrame({"complainID": pd.Series([], dtype="string"),
                              "complainStatus": pd.Series([], dtype="string"),
                              "complainUser": pd.Series([], dtype="string"),
                              "complainTitle": pd.Series([], dtype="string"),
                              "complainViews": pd.Series([], dtype="string"),
                              "complainText": pd.Series([], dtype="string"),
                              "complainDate": pd.Series([], dtype="string")
                             })

## Create the function to save the URL Complains

This function will go to the soup object define in the next loop and take the information about complainID and complainURL that will later be used to capture the information of the complain

In [34]:
def getComplain(page, dfObj):
    #identifying the url
    url = page
    # Open the page
    page = urlopen(url)
    # Read page html
    html = page.read().decode("utf-8")
    #Create soupObject
    soup = BeautifulSoup(html, "html.parser")

    # Identifying complain body
    bodyClass = "col-xl-9"
    body = soup.find("div",{"class": bodyClass})

    # Getting name
    cUser = body.find("h6").find("a").get_text()
    # Getting ID
    cId = body.find("div", {"class": "complaint-detail-body-subheader-info-item mr-3 pr-3 text-muted mt-2 mt-sm-0"}).get_text()
    cId = cId.strip()
    # Status
    cStatus = body.find("div", {"class": "badge"}).get_text()
    # Title
    cTitle = body.find("h4").get_text()
    # Views
    cViews = body.find("div", {"class": "complaint-detail-body-subheader-info-item mr-3 pr-3 text-muted"}).get_text()
    cViews = cViews.strip()
    # Date
    cDate = body.find("time").get_text()
    # Complain Text
    cComplainText = body.find("div", {"class": "complaint-detail-body-description"}).get_text()

    #print("User: ", cUser)
    #print("ID: ", cId)
    #print("Status: ", cStatus)
    #print("Title: ", cTitle)
    #print("Views: ", cViews)
    #print("Date: ", cDate)
    #print("Complain: ", cComplainText)

    # Append values to a DataFrame
    dfObj = dfObj.append({"complainID": cId,
                          "complainStatus": cStatus,
                          "complainTitle": cTitle,
                          "complainUser": cUser,
                          "complainViews": cViews,
                          "complainText": cComplainText,
                          "complainDate": cDate},
                          ignore_index=True)
    return dfObj

## Main Loop
This loop will take a pagesNum and take all complains identification on that page [complainID, complainUrl]

In [35]:
complains = 1

for index, row in complainsUrl.iterrows():
    # Naming the complain
    # print(len(complainsUrl))
    print("Processing complain ID: ", row["complainID"], " | ", complains, " out of ", len(complainsUrl))
    url = row["complainURL"]
    complainsData = getComplain(url, complainsData)
    complains = complains + 1

Processing complain ID:  59476521  |  1  out of  177
Processing complain ID:  58935721  |  2  out of  177
Processing complain ID:  58754421  |  3  out of  177
Processing complain ID:  58696921  |  4  out of  177
Processing complain ID:  58650521  |  5  out of  177
Processing complain ID:  58649421  |  6  out of  177
Processing complain ID:  58604421  |  7  out of  177
Processing complain ID:  58582621  |  8  out of  177
Processing complain ID:  58575521  |  9  out of  177
Processing complain ID:  58540521  |  10  out of  177
Processing complain ID:  58530921  |  11  out of  177
Processing complain ID:  58507221  |  12  out of  177
Processing complain ID:  58486021  |  13  out of  177
Processing complain ID:  58397921  |  14  out of  177
Processing complain ID:  58386121  |  15  out of  177
Processing complain ID:  58239821  |  16  out of  177
Processing complain ID:  58146321  |  17  out of  177
Processing complain ID:  58032021  |  18  out of  177
Processing complain ID:  58000721  | 

In [36]:
complainsData

Unnamed: 0,complainID,complainStatus,complainUser,complainTitle,complainViews,complainText,complainDate
0,59476521,Aguarda resposta,Susana,Lefties - Devolução de artigos,55,Recebi a minha encomenda no dia 19-04-2021 e i...,5 de maio 2021
1,58935721,Aguarda resposta,Cassia Barcelos,Lefties - Troca de peças compradas on-line,175,"Boa noite, \r\nVenho mostrar o meu total desco...",23 de abril 2021
2,58754421,Aguarda resposta,Milene,Lefties - Encomenda não entregue,62,A minha encomenda que fiz no dia 19 de Março n...,20 de abril 2021
3,58696921,Aguarda resposta,Mariana Ferreira,Lefties - Encomenda não entregue nem resolvem ...,41,Encomenda nr ‪90003989775‬ já deveria ter sido...,19 de abril 2021
4,58650521,Resolvida,Olga Santos,Lefties - Encomenda não entregue,124,Bom dia. Fiz uma encomenda online na Lefties n...,17 de abril 2021
...,...,...,...,...,...,...,...
172,8776417,Sem resolução,Maria Fernandes,Lefties - Hematoma grave na face,1305,Os sapatos prateados estavam na prateleira aci...,13 de março 2017
173,7865917,Sem resolução,patricia vidal,Lefties - Mau atendimento,1829,Bom dia.Ja por diversas vezes me dirigi a voss...,23 de janeiro 2017
174,5810316,Sem resolução,Julia Almeida,Lefties - Troca,3777,Boa tarde fui a lefties C.C. Arrábida fiz uma ...,19 de setembro 2016
175,5716216,Sem resolução,Carla Castro,Lefties - Promoção,1024,"Comprei umas sapatilhas em promoção, disseram ...",13 de setembro 2016


### Save it to an excel sheet

In [37]:
complainsData.to_excel("complains_data.xlsx")