# Project 2: Web Scrapping and API

## Modules imports

In [1]:
import re
import sys
import requests
from pathlib import Path
import datetime
from time import sleep

import numpy as np
import pandas as pd

from bs4 import BeautifulSoup

## Get Content

In [2]:
def get_cars_content(content):
    soup = BeautifulSoup(content, 'lxml')

    cars_body = soup.select(".card-body")
    names = [car.h2.text.strip() for car in  cars_body]
    
    details  = [car.p.text.strip() for car in cars_body]
    years = [year for car in details for year in re.findall(r"\d{4}", car) ]
    kilometers = [ kilometer.strip() for car in details for kilometer in re.findall(r"\W\d.*\d+\W", car)]
    places = [ place for car in details for place in re.findall(r"\s*([\S]+)$", car)]
    
    prices_info = soup.select(".payment-total.payment-highlight")
    prices = [car.text.strip() for car in prices_info] 

    return zip(names,years,kilometers,places, prices)

## Request information

In [3]:
def request_content(url):
    cars_response = []
    for page in range(1,11):
        print(f"Request information: {url%page}")
        response = requests.get(url%page, sleep(2))  
        
        print(f"Extracting information from page #{page}")
        cars_response.append(list((get_cars_content(response.content))))
        sleep(2)      
        print('Information extracted successfuly')
        print(10*"--------")
    return cars_response

## Main function

In [4]:
def run():
    base_url = "https://www.kavak.com/mx/page-%s/compra-de-autos"
    print("The information is being extracted from website")
    sleep(2)
    cars_data = request_content(base_url)
    cars = [car for matrix in cars_data for car in matrix]
    
    print("The information has been completed, transforming data: ")
    print(10*"--------")
    sleep(3)
    filename = f"car_sales_kavak.csv"
    filename = "data/"+ filename
    
    print(f"The file will be named {filename}")
    df = pd.DataFrame(cars , columns=["Car_Name", "Years", "Kilometers", "Places","Prices"],dtype=object)
    

# #     print(df)
    print(10*"--------")
    sleep(3)
    df.to_csv(filename, index = False)
    print(f"{filename} saved.")
    print("Finished.")
    return df

In [5]:
run()


The information is being extracted from website
Request information: https://www.kavak.com/mx/page-1/compra-de-autos
Extracting information from page #1
Information extracted successfuly
--------------------------------------------------------------------------------
Request information: https://www.kavak.com/mx/page-2/compra-de-autos
Extracting information from page #2
Information extracted successfuly
--------------------------------------------------------------------------------
Request information: https://www.kavak.com/mx/page-3/compra-de-autos
Extracting information from page #3
Information extracted successfuly
--------------------------------------------------------------------------------
Request information: https://www.kavak.com/mx/page-4/compra-de-autos
Extracting information from page #4
Information extracted successfuly
--------------------------------------------------------------------------------
Request information: https://www.kavak.com/mx/page-5/compra-de-autos
Ext

Unnamed: 0,Car_Name,Years,Kilometers,Places,Prices
0,Kia Soul EX,2017,62170,Monterrey,"$249,999"
1,Chevrolet Cruze LS Turbo,2017,72910,Monterrey,"$194,999"
2,Honda Accord EXL,2015,105360,Monterrey,"$265,999"
3,Infiniti Q50 Híbrido,2017,83440,Monterrey,"$396,999"
4,Mazda MX-5 I Sport,2017,53960,Monterrey,"$310,999"
...,...,...,...,...,...
354,Hyundai Creta Limited,2019,109700,Monterrey,"$388,999"
355,Chevrolet Sonic LS (Línea anterior),2016,95100,Monterrey,"$174,999"
356,Dodge Journey SE,2015,116500,Monterrey,"$254,999"
357,Volkswagen Passat CC 2.0T,2016,99500,Monterrey,"$312,999"


In [6]:
df = pd.read_csv('./data/car_sales_kavak.csv')
df

Unnamed: 0,Car_Name,Years,Kilometers,Places,Prices
0,Kia Soul EX,2017,62170,Monterrey,"$249,999"
1,Chevrolet Cruze LS Turbo,2017,72910,Monterrey,"$194,999"
2,Honda Accord EXL,2015,105360,Monterrey,"$265,999"
3,Infiniti Q50 Híbrido,2017,83440,Monterrey,"$396,999"
4,Mazda MX-5 I Sport,2017,53960,Monterrey,"$310,999"
...,...,...,...,...,...
354,Hyundai Creta Limited,2019,109700,Monterrey,"$388,999"
355,Chevrolet Sonic LS (Línea anterior),2016,95100,Monterrey,"$174,999"
356,Dodge Journey SE,2015,116500,Monterrey,"$254,999"
357,Volkswagen Passat CC 2.0T,2016,99500,Monterrey,"$312,999"
