In [1]:
from datetime import datetime
from os import path
from pathlib import Path
from re import sub

from lxml import html
from pandas import DataFrame
from requests import get

In [2]:
def clear_row(value: str) -> str:
    return sub('\n', '', value).strip()


def clear_float(value: str) -> float:
    return float(value.replace(',', '').strip())


def remove_currency(currency: str) -> str:
    return lambda v: v.replace(currency, '').strip()


def get_timestamp():
    return datetime.now().strftime('%Y-%m-%dT%H:%M:%S')


def parse_response(response: str, timestamp: str) -> DataFrame:
    tree = html.fromstring(response)

    rows = []

    for row in tree.xpath('//table/tr')[1:]:
        cols = row.xpath('td')

        user_name = cols[0].xpath('a/text()')[0]
        user_profile = cols[0].xpath('a/@href')[0]

        payment_method = clear_row(' '.join(cols[1].xpath('text()')))
        payment_method_url = cols[1].xpath('a/@href')[0]

        price = clear_row(' '.join(cols[2].xpath('text()')))

        currency = price.rsplit(' ', 1)[-1]
        currency_cleaner = remove_currency(currency)

        price = clear_float(currency_cleaner(price))

        limits = currency_cleaner(clear_row(' '.join(cols[3].xpath('text()'))))

        if not limits:
            limits_min, limits_max = 0, 0
        else:
            limits = list(map(clear_float, limits.split('-')))
            limits_min = limits[0]
            limits_max = limits[1]

        rows.append({
            'timestamp': timestamp,
            'user_name': user_name,
            'user_profile': user_profile,
            'payment_method': payment_method,
            'payment_method_url': payment_method_url,
            'price': price,
            'limits_min': limits_min,
            'limits_max': limits_max,
            'currency': currency
        })
    return DataFrame(rows)


In [3]:
local_dir = 'data'
Path('data').mkdir(parents=True, exist_ok=True)

In [4]:
sellers_url = ('sellers', 'https://localbitcoins.com/buy-bitcoins-online/nl/netherlands/')
buyers_url  = ('buyers', 'https://localbitcoins.com/sell-bitcoins-online/nl/netherlands/')

In [5]:
for side, url in [sellers_url, buyers_url]:
    timestamp = get_timestamp()
    filename = path.join(local_dir, f'{side}_{timestamp}.csv')
    response = get(url)
    df = parse_response(response.text, timestamp)
    df.to_csv(filename, index=False, sep=",", quotechar='"')