# Ristretto Index
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mygs/koffee/blob/master/nespresso/notebook/ristretto_index.ipynb)

In [None]:
import pandas as pd
import os
import json
import re
import time
from bs4 import BeautifulSoup
import requests
import pickle
import folium
from urllib.request import urlopen
from datetime import datetime
from IPython.display import Audio, display, HTML
try:
    import geopandas
except ImportError:
    !pip install geopandas

In [None]:
#@title Setting variables{display-mode: "form"}
display(HTML("<style>audio{display:none}</style>"))
ALARM=Audio(url='https://sound.peal.io/ps/audios/000/021/856/original/youtube_21856.mp3', autoplay=True)

IN_COLAB=False
if 'google.colab' in str(get_ipython()):
    print("I am running in Colab. Will get latest cache files in Github")
    IN_COLAB = True
    GETGEOAPI_URL=None
    RISTRETTO_PRICE_FILE = "https://raw.githubusercontent.com/mygs/koffee/master/nespresso/notebook/ristretto.json"
    FX_CACHE_FILE = "https://raw.githubusercontent.com/mygs/koffee/master/nespresso/notebook/fx_cache.json"
    NESPRESSO = pd.read_csv("https://raw.githubusercontent.com/mygs/koffee/master/nespresso/nespresso_ristretto.csv")
else:
    print("Local run. Will update cache files if outdated")
    API_KEY=open("getgeoapi.cfg", "r").read()
    GETGEOAPI_URL="https://api.getgeoapi.com/v2/currency/convert?api_key="+API_KEY+"&from={}&to=USD&amount=1&format=json"
    RISTRETTO_PRICE_FILE="ristretto.json"
    FX_CACHE_FILE="fx_cache.json"
    NESPRESSO = pd.read_csv("../nespresso_ristretto.csv")

NESPRESSO.head()

In [None]:
#@title Scrapping Nespresso websites{display-mode: "form"}
def get_nespresso_price(url):
    html = requests.get(url).content
    soup = BeautifulSoup(html, 'html.parser')
    try:
        script = soup.find("script",text=re.compile(r".*internationalId.*"))
        if script:
            script_string = script.text.replace('\\n', ' ')
            json_object = json.loads(re.findall('window.ui.push\((.*?)\);\n', script_string)[0])
            return float(json_object["configuration"]["eCommerceData"]["product"]["price"])
        else:
            script = soup.find("script",text=re.compile(r".*Magento_Catalog\/js\/product\/view\/provider.*"))
            if script:
                json_object = json.loads(script.text.replace('\\n', ' '))
                data_items = json_object["*"]["Magento_Catalog/js/product/view/provider"]["data"]["items"]

                for k,item in data_items.items():
                    return float(item["price_info"]["regular_price"])
                return "not found"
            else:
                script = soup.find("script",text=re.compile(r".*aepc_pixel_events.*"))
                if script:
                    json_object = json.loads(re.findall('var aepc_pixel_events = (.*?);\n', script.text)[0])
                    return float(json_object["standard_events"]["ViewContent"][0]["params"]["value"])
                else:
                    script = soup.find("script",text=re.compile(r".*priceSpecification.*"))
                    if script:
                        json_object = json.loads(script.text.replace('\\n', ' '))
                        return float(json_object['@graph'][1]["offers"][0]["price"])
                    else:
                        script = soup.find("script",text=re.compile(r".*Viewed Product.*"))
                        if script:
                            json_object = json.loads(re.findall('"Viewed Product",(.*?)\);', script.text)[0])
                            return float(json_object["price"])
                        else:
                            script = soup.find("script",text=re.compile(r".*productID.*"))
                            if script:
                                json_object = json.loads(script.text.replace('\n', ' '))
                                return float(json_object["offers"]["price"])
                            else:
                                script = soup.find("script",text=re.compile(r".*productBasePriceTaxIncl.*"))
                                if script:
                                    price = re.findall('var productBasePriceTaxIncl = (.*?);\n', script.text)[0]
                                    return float(price)
                                else:
                                    product_entity = soup.find("input",{"id": "productEntity"})
                                    if product_entity:
                                        json_object = json.loads(product_entity["value"])
                                        return float(json_object["priceFinal"])
                                    else:
                                        price_tag = soup.find("span",{"class": "nes-capsule-header-price"})
                                        if price_tag:
                                            return float(price_tag.text.replace('€', ''))
                                        else:
                                            price_tag = soup.find("span",{"class": "product__info__price"})
                                            if price_tag:
                                                return float(price_tag.text.replace('₱', ''))
                                            else:
                                                price_tag = soup.find("p",{"class": "product-price"})
                                                if price_tag:
                                                    return float(price_tag.text.replace('€', ''))
                                                else:
                                                    return "not found"
    except:
        return "parser error"

def get_today_ristretto_prices():
    if IN_COLAB == True:
        response = urlopen(RISTRETTO_PRICE_FILE)
        return json.loads(response.read())
    else:
        update = False
        if os.path.exists(RISTRETTO_PRICE_FILE):
            filedate = datetime.fromtimestamp(os.path.getctime(RISTRETTO_PRICE_FILE)).date()
            today = datetime.now().date()
            if filedate == today:
                with open(RISTRETTO_PRICE_FILE, 'r') as fp:
                    return json.load(fp)
            else:
                os.remove(RISTRETTO_PRICE_FILE)
                update = True
        else:
            update = True

        if update:
            ww_ristreto_prices = []
            today = datetime.today().strftime('%Y-%m-%d')
            for r in NESPRESSO.index:
                local = {}
                local["date"] = today
                local["country"] = NESPRESSO['flag_code'][r]
                local["local_price"] = get_nespresso_price(NESPRESSO['ristretto_url'][r])
                ww_ristreto_prices.append(local)
            with open(RISTRETTO_PRICE_FILE, 'w') as fp:
                json.dump(ristretto, fp)
            return ww_ristreto_prices

ristretto=get_today_ristretto_prices()
ALARM

In [None]:
#@title Getting FX Rates{display-mode: "form"}
def get_today_fx_rate():

    if IN_COLAB == True:
        response = urlopen(FX_CACHE_FILE)
        return json.loads(response.read())
    else:
        update_cache = False
        if os.path.exists(FX_CACHE_FILE):
            filedate = datetime.fromtimestamp(os.path.getctime(FX_CACHE_FILE)).date()
            today = datetime.now().date()
            if filedate == today:
                with open(FX_CACHE_FILE, 'r') as fp:
                    return json.load(fp)
            else:
                os.remove(FX_CACHE_FILE)
                update_cache = True
        else:
            update_cache = True

        if update_cache:
            FX_CACHE = {}
            for currency in set(NESPRESSO.fx.values):
                rate = -1
                req_resp = requests.get(url=GETGEOAPI_URL.format(currency))
                data = req_resp.json()
                if "rates" in data:
                    rate = float(data["rates"]["USD"]["rate"])                
                FX_CACHE[currency] = rate
            with open(FX_CACHE_FILE, 'w') as fp:
                json.dump(FX_CACHE, fp)
            return FX_CACHE

fx_cache = get_today_fx_rate()
ALARM

In [None]:
def calculate_price(fx_cache, ristretto):
    prices = []
    ignored_country=[]
    for row in ristretto:
        entry ={}
        entry["country_code"] = NESPRESSO[NESPRESSO.flag_code==row["country"]].alpha_3.values[0]
        entry["country"] = NESPRESSO[NESPRESSO.flag_code==row["country"]].country.values[0]    
        entry["local_currency"] = NESPRESSO[NESPRESSO.flag_code==row["country"]].fx.values[0]
        price = row["local_price"]*fx_cache[entry["local_currency"]]
        if price > 5: # greater tha $5? should be price for 10 units, so ...
            entry["local_price"]= round(row["local_price"]/10,3)
            entry["price"] = round(price/10,2)
        else:
            entry["local_price"]= row["local_price"]
            entry["price"] = round(price,2)
        if (entry['price'] < 4) & (entry['price'] > 0.1):
            prices.append(entry)
        else:
            ignored_country.append(entry["country"])
    print("[IGNORED] %s"%{','.join(ignored_country)})
    return pd.DataFrame(prices)
prices = calculate_price(fx_cache, ristretto)
ALARM

In [None]:
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
prices = world.merge(prices, how="left", left_on=['iso_a3'], right_on=['country_code'])
prices = prices.dropna(subset=['price'])

In [None]:
nespresso_map = folium.Map()
# Add the data
folium.Choropleth(
    geo_data=prices,
    name='choropleth',
    data=prices,
    columns=['name', 'price'],
    key_on='feature.properties.name',
    fill_color='Accent',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Ristretto Index (USD/capsule)'
).add_to(nespresso_map)
#Visualize
nespresso_map