In [5]:
pip install lxml

Collecting lxml
  Using cached lxml-4.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl (6.6 MB)
Installing collected packages: lxml
Successfully installed lxml-4.9.1
Note: you may need to restart the kernel to use updated packages.


In [104]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from binascii import hexlify
%matplotlib inline

from urllib.request import urlopen
from bs4 import BeautifulSoup

totalCount = 10
minPrice = '1000000'
maxPrice = '100000000'
baseUrl = f'http://kolesa.kz/cars/toyota/camry'

def parseCars(url):
    result = []
    html = urlopen(url)
    soup = BeautifulSoup(html.read().decode('utf-8', 'ignore'))
    cars = soup.findAll('div', {'class': 'a-card'})

    for car in cars:
        title = car.find('h5', {'class': 'a-card__title'}).find('a', {'class': 'a-card__link'}).text.strip()
        priceTitle = car.find('span', {'class': 'a-card__price'}).text.strip()
        price = int(''.join(filter(str.isdigit, priceTitle.replace('\xa0', ''))))

        result.append({'title': title, 'price': price, 'price_title': priceTitle})

    return result

def getCars(sortType, limitType, limit):
    url = f'{baseUrl}/?price[{limitType}]={limit}&sort_by=price-{sortType}'
    page = 1
    
    cars = []
    prices = []
    
    while len(cars) < totalCount:
        urlWithPaginate = url + f'&page={page}'
        for parsedCar in parseCars(urlWithPaginate):
            if parsedCar['price'] in prices:
                continue
            prices.append(parsedCar['price'])
            
            if (len(cars) == totalCount):
                break
            
            cars.append(parsedCar)

        page = page + 1
    
    return cars

def getDifferences(prices):
    prices.sort()

    diffs = []
    for i in range(len(prices) - 1):
        diff = prices[i + 1] - prices[i]
        if diff in diffs:
            continue
        diffs.append(diff)
    
    return diffs

def getStatistics(cheap, expensive):
    byPrice = lambda car: car['price']

    cars = cheap + expensive
    cars.sort(key=byPrice)

    prices = list(map(byPrice, cars))
    
    statistics = {
        'max_price': max(prices),
        'min_price': min(prices),
        'avg_price': sum(prices)/len(prices)
    }

    differences = getDifferences(list(map(byPrice, cheap))) + getDifferences(list(map(byPrice, expensive)))
    differences.sort()

    statistics['differences'] = differences

    return statistics


mostCheap = getCars('asc', 'from', minPrice)
mostExpensive = getCars('desc', 'to', maxPrice)

statistics = getStatistics(mostCheap, mostExpensive)

print('Statistics of top cheap/expensive cars:')

print()

print('Max price: ', statistics['max_price'])
print('Min price: ', statistics['min_price'])
print('Average price: ', statistics['avg_price'])
print('Differences between prices', statistics['differences'])

print()

for car in mostCheap + mostExpensive:
    print(car['title'], car['price_title'])


Statistics of top cheap/expensive cars:

Max price:  32500000
Min price:  1000000
Average price:  15854000.1
Differences between prices [9998, 10000, 40000, 50000, 50000, 190000, 310000, 490002, 500000, 1000000]

Toyota Camry 1 000 000 ₸
Toyota Camry 1 050 000 ₸
Toyota Camry 1 100 000 ₸
Toyota Camry 1 150 000 ₸
Toyota Camry 1 190 000 ₸
Toyota Camry 1 200 000 ₸
Toyota Camry 1 250 000 ₸
Toyota Camry 1 300 000 ₸
Toyota Camry 1 350 000 ₸
Toyota Camry Lumiere 1 360 000 ₸
Toyota Camry 32 500 000 ₸
Toyota Camry 31 500 000 ₸
Toyota Camry 31 190 000 ₸
Toyota Camry 31 000 000 ₸
Toyota Camry 30 990 002 ₸
Toyota Camry 30 500 000 ₸
Toyota Camry 30 000 000 ₸
Toyota Camry 29 500 000 ₸
Toyota Camry 29 000 000 ₸
Toyota Camry 28 950 000 ₸
