#   Project web: How much does your groceries cost in Bitcoin? 

Isaac Rodriguez

*Data Part Time Barcelona Dic 2019*

## Content
- [Project Description](#project)

<a name="project"></a>
## Project Description
Goal of this project is to choose an API to obtain data from and a web page to scrape, convert it into a Pandas data frame, and export it as a CSV file.

## Overview

In [57]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [77]:
def change_format_to_utf8(string):
    return string.encode('latin-1', 'replace').decode('utf-8', 'replace')

## Get categories

In [78]:
ulabox_content = requests.get(ulabox_web).content
web_soup = BeautifulSoup(ulabox_content, "lxml")
table = web_soup.find_all("ul", {"class": "list-dropdown"})[0]

rows = table.find_all("li")
links = [row.find("a", {"class": "list-dropdown-item__link list-dropdown-item-link | js-pjax js-track-ui js-updatable-track"}) for row in rows]

links = [link.get("href").split("?ula_src=")[0] for link in links if link]
rows = [change_format_to_utf8(row.text.strip()) for row in rows]

df_categories = pd.DataFrame({"Categories": rows[1:], "Links": links})

## Get subcategories from each category

In [79]:
df_subcategories = pd.DataFrame()

for index, row in df_categories.iterrows():
    ulabox_subcategory_web = ulabox_web + row['Links']
    ulabox_content = requests.get(ulabox_subcategory_web).content
    ulabox_soap = BeautifulSoup(ulabox_content, "lxml")
    
    table = ulabox_soap.find_all("div", {"class": "col-xs-12 col-sm-4"})
    items = [row.find_all("h2", {"class": "category-item__name epsilon islet brand-face"}) for row in table]
    links = [row.find_all("a") for row in table]
    items_name = [change_format_to_utf8(item[0].text) for item in items]
    links_name = [item[0].get("href").split("?ula_src=")[0] for item in links]
    
    for index, item in enumerate(table):
        subitems = item.find_all("ul", {"class": "soft-half--sides soft-half--top | unlist"})
        subitems = item.find_all("li", {"class": "selectable-item | weak-text-color milli | flush--bottom"})
        subitems = [change_format_to_utf8(item.text) for item in subitems]
        
        d = {row['Categories']: [items_name[index], links_name[index]]}
        df_subcategories = df_subcategories.append(pd.DataFrame.from_dict(d, orient = 'index', columns= ["Subcategory", "Links"]))
        df_subcategories['Category'] = df_subcategories.index

Now index is the category, reset to be numerical.

In [None]:
df_subcategories.reset_index(drop=True)

## Get items from each subcategory

In [62]:
df_products = pd.DataFrame()

for index, row in df_subcategories.iterrows():
    ulabox_products_web = ulabox_web + row['Links']
    ulabox_content = requests.get(ulabox_products_web).content
    ulabox_soap = BeautifulSoup(ulabox_content, "lxml")
    
    if ulabox_soap.find_all("section", {"class": "product-list"}): 
        table = ulabox_soap.find_all("section", {"class": "product-list"})[0]
        items = table.find_all("div", {"class": "grid__item m-one-whole t-one-third d-one-third dw-one-quarter | js-product-grid-grid"})
        item = [item.find_all("article")[0] for item in items if len(item.find_all("article")) > 0]

        product_price = [item.get("data-price") for item in item]
        product_id = [item.get("data-product-id") for item in item]
        product_brand = [change_format_to_utf8(item.get("data-product-brand")) for item in item]
        product_name = [change_format_to_utf8(item.get("data-product-name")) for item in item]

        df_products = df_products.append(pd.DataFrame({"id": product_id, "name": product_name, "category": row['Category'], "subcategory": row['Subcategory'], "price": product_price, "currency": "EUR", "brand": product_brand}))

We convert our price column to float.

In [64]:
df_products['price'] = df_products['price'].astype(float)

We reset our index.

In [65]:
df_products.reset_index(drop=True, inplace=True)

Our final dataset for the web scraping part: 

In [66]:
df_products.head()

Unnamed: 0,id,name,category,subcategory,price,currency,brand
0,54521,Ensalada Mezclum Petit Plà 250g,Frescos,Mercado,2.98,EUR,Fruites i Verdures Lluís Macià
1,42519,Aguacate Maduro,Frescos,Mercado,3.99,EUR,Fruites i Verdures Lluís Macià
2,43876,Plátano de Canarias Verde,Frescos,Mercado,2.47,EUR,Fruites i Verdures Lluís Macià
3,42501,Plátano de Canarias Maduro,Frescos,Mercado,2.47,EUR,Fruites i Verdures Lluís Macià
4,42535,Fresas de Maresme 500g,Frescos,Mercado,3.98,EUR,Fruites i Verdures Lluís Macià


Then we export the final dataset to excel in the outputs folder.

In [57]:
df_products.to_csv("./outputs/ulabox_products.csv")

## Get current transfer

In [67]:
import requests
from pandas.io.json import json_normalize
import pandas as pd

In [68]:
blockchain_url = "https://blockchain.info/ticker"
headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Safari/605.1.15" }  

json = requests.get(blockchain_url, headers=headers).json()
currencies_array = []

for item in json:
    json[item]["Currency"] = item
    currencies_array.append(json[item])

df_btc_prices = pd.DataFrame(currencies_array)

In [69]:
df_btc_prices.head()

Unnamed: 0,15m,last,buy,sell,symbol,Currency
0,9567.27,9567.27,9567.27,9567.27,$,USD
1,14329.89,14329.89,14329.89,14329.89,$,AUD
2,41767.82,41767.82,41767.82,41767.82,R$,BRL
3,12642.67,12642.67,12642.67,12642.67,$,CAD
4,9400.18,9400.18,9400.18,9400.18,CHF,CHF


In [70]:
df_btc_prices.to_csv("./outputs/btc_prices_currencies.csv")

## Merge

In [71]:
def get_value_from_currency(value):
    index = df_btc_prices[df_btc_prices['Currency'] == value].index.tolist()[0]
    return df_btc_prices["last"][index]

def price_to_btc(row):
    currency = row['currency']
    price = row["price"]
    btc_price = get_value_from_currency(currency)
    return price / btc_price

In [72]:
df_products['price_btc'] = df_products.apply(lambda row: price_to_btc(row), axis=1)

In [73]:
df_products.head()

Unnamed: 0,id,name,category,subcategory,price,currency,brand,price_btc
0,54521,Ensalada Mezclum Petit Plà 250g,Frescos,Mercado,2.98,EUR,Fruites i Verdures Lluís Macià,0.000336
1,42519,Aguacate Maduro,Frescos,Mercado,3.99,EUR,Fruites i Verdures Lluís Macià,0.00045
2,43876,Plátano de Canarias Verde,Frescos,Mercado,2.47,EUR,Fruites i Verdures Lluís Macià,0.000278
3,42501,Plátano de Canarias Maduro,Frescos,Mercado,2.47,EUR,Fruites i Verdures Lluís Macià,0.000278
4,42535,Fresas de Maresme 500g,Frescos,Mercado,3.98,EUR,Fruites i Verdures Lluís Macià,0.000448


In [None]:
df_btc_prices.to_csv("./outputs/final_ulabox_products.csv")