# Ristretto Index
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mygs/koffee/blob/master/nespresso/notebooks/ristretto_index.ipynb)

In [1]:
import pandas as pd
import json
import re
import time
from bs4 import BeautifulSoup
import requests
import pickle
from datetime import datetime
ristretto = pd.read_csv("../nespresso_ristretto.csv")
ristretto.head()
with open('20220207_ristretto.pkl', 'rb') as handle:
    ristretto_20220207 = pickle.load(handle)

Unnamed: 0,region,country,flag_code,fx,ristretto_url
0,Africa,Mauritius,im,MUR,https://www.buynespresso.com/mu_en/ispirazione...
1,Africa,Morocco,ma,MAD,https://ma.buynespresso.com/ma_en/ispirazione-...
2,Africa,South Africa,za,ZAR,https://za.buynespresso.com/za_en/coffee/origi...
3,Africa,Egypt,eg,EGP,https://www.buynespresso.com/eg_en/ispirazione...
4,Africa,Ivory Coast,ci,XOF,https://www.buynespresso.com/ci_en/ispirazione...


In [2]:
def get_nespresso_price(url):
    html = requests.get(url).content
    soup = BeautifulSoup(html, 'html.parser')
    try:
        script = soup.find("script",text=re.compile(r".*internationalId.*"))
        if script:
            script_string = script.text.replace('\\n', ' ')
            json_object = json.loads(re.findall('window.ui.push\((.*?)\);\n', script_string)[0])
            return float(json_object["configuration"]["eCommerceData"]["product"]["price"])
        else:
            script = soup.find("script",text=re.compile(r".*Magento_Catalog\/js\/product\/view\/provider.*"))
            if script:
                json_object = json.loads(script.text.replace('\\n', ' '))
                data_items = json_object["*"]["Magento_Catalog/js/product/view/provider"]["data"]["items"]

                for k,item in data_items.items():
                    return float(item["price_info"]["regular_price"])
                return "not found"
            else:
                script = soup.find("script",text=re.compile(r".*aepc_pixel_events.*"))
                if script:
                    json_object = json.loads(re.findall('var aepc_pixel_events = (.*?);\n', script.text)[0])
                    return float(json_object["standard_events"]["ViewContent"][0]["params"]["value"])
                else:
                    script = soup.find("script",text=re.compile(r".*priceSpecification.*"))
                    if script:
                        json_object = json.loads(script.text.replace('\\n', ' '))
                        return float(json_object['@graph'][1]["offers"][0]["price"])
                    else:
                        script = soup.find("script",text=re.compile(r".*Viewed Product.*"))
                        if script:
                            json_object = json.loads(re.findall('"Viewed Product",(.*?)\);', script.text)[0])
                            return float(json_object["price"])
                        else:
                            script = soup.find("script",text=re.compile(r".*productID.*"))
                            if script:
                                json_object = json.loads(script.text.replace('\n', ' '))
                                return float(json_object["offers"]["price"])
                            else:
                                script = soup.find("script",text=re.compile(r".*productBasePriceTaxIncl.*"))
                                if script:
                                    price = re.findall('var productBasePriceTaxIncl = (.*?);\n', script.text)[0]
                                    return float(price)
                                else:
                                    product_entity = soup.find("input",{"id": "productEntity"})
                                    if product_entity:
                                        json_object = json.loads(product_entity["value"])
                                        return float(json_object["priceFinal"])
                                    else:
                                        price_tag = soup.find("span",{"class": "nes-capsule-header-price"})
                                        if price_tag:
                                            return float(price_tag.text.replace('€', ''))
                                        else:
                                            price_tag = soup.find("span",{"class": "product__info__price"})
                                            if price_tag:
                                                return float(price_tag.text.replace('₱', ''))
                                            else:
                                                price_tag = soup.find("p",{"class": "product-price"})
                                                if price_tag:
                                                    return float(price_tag.text.replace('€', ''))
                                                else:
                                                    return "not found"
    except:
        return "parser error"

In [3]:
ww_ristreto_prices = []
today = datetime.today().strftime('%Y-%m-%d')
for r in ristretto.index:
    local = {}
    local["date"] = today
    local["country"] = ristretto['flag_code'][r]
    local["price"] = get_nespresso_price(ristretto['ristretto_url'][r])
    ww_ristreto_prices.append(local)
#with open('20220207_ristretto.pkl', 'wb') as handle:
#    pickle.dump(ww_ristreto_prices, handle, protocol=pickle.HIGHEST_PROTOCOL)

{'date': '2022-02-07', 'country': 'im', 'price': 250.0}
{'date': '2022-02-07', 'country': 'ma', 'price': 59.0}
{'date': '2022-02-07', 'country': 'za', 'price': 85.0}
{'date': '2022-02-07', 'country': 'eg', 'price': 140.0}
{'date': '2022-02-07', 'country': 'ci', 'price': 3200.0}
{'date': '2022-02-07', 'country': 'br', 'price': 2.8}
{'date': '2022-02-07', 'country': 'ca', 'price': 0.83}
{'date': '2022-02-07', 'country': 'co', 'price': 2350.0}
{'date': '2022-02-07', 'country': 'ar', 'price': 96.0}
{'date': '2022-02-07', 'country': 'cl', 'price': 610.0}
{'date': '2022-02-07', 'country': 'pa', 'price': 0.98}
{'date': '2022-02-07', 'country': 'mx', 'price': 14.5}
{'date': '2022-02-07', 'country': 'us', 'price': 0.75}
{'date': '2022-02-07', 'country': 'au', 'price': 0.77}
{'date': '2022-02-07', 'country': 'cn', 'price': 3.8}
{'date': '2022-02-07', 'country': 'ph', 'price': 32.0}
{'date': '2022-02-07', 'country': 'vn', 'price': 14700.0}
{'date': '2022-02-07', 'country': 'tw', 'price': 17.0}
{'