In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:75% !important; }</style>"))

In [2]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import re
import pprint

In [3]:
def get_brands_urls(main_url="https://ihero.dk/priser/"):
    html = urlopen(main_url)
    soup = BeautifulSoup(html, "html.parser")
    # we extract the urls corresponding to each of the phones on the page of the brand
    urls = {}
    brand_soups = soup.find_all("a", class_="col m:w1/1 brand-box")
    n=0
    while True:
        try:
            brand = brand_soups[n].find("span", class_="brand-box-title").contents[0].replace("Reparation af", "").strip()
            url = brand_soups[n].get("href")
            
            if brand in ["iPhones", "iPads", "Mac"]:
                brand="Apple" + " " + brand
            elif brand in ["PCer"]:
                brand="PC"
            
            urls[brand] = url
            n+=1

        except:
            break
    return urls

# get_brands_urls()

In [4]:
def get_series_urls(brand_name, brand_url):
    html = urlopen(brand_url)
    soup = BeautifulSoup(html, "html.parser")
    series_soups = soup.find_all("a", class_="col m:w1/1 brand-box")
    series_urls = {}
    for serie_soup in series_soups:
        serie_name = serie_soup.find("span", class_="brand-box-title").contents[0]
        url = serie_soup.get("href")
        serie_name = serie_name.replace("-Serien", "").strip()
        series_urls[serie_name] = url
    return series_urls

# get_series_urls('Samsung', 'https://ihero.dk/samsung-reparation/')

In [5]:
def get_phone_serie_reparations(brand_name, serie_name, serie_url):
    html = urlopen(serie_url)
    soup = BeautifulSoup(html, "html.parser")
    reparation_names = [rep.contents[0] for rep in soup.find_all("td", class_="rep-title")]
    prices =  [price.contents[0] for price in soup.find_all("td", class_="price-field tilbuds-pris")]
    
    serie_dict = {}    
    
    for reparation_name, price in zip(reparation_names, prices):
        
        # convert price to float
        price = float(price.replace(",-", "").strip())
        
        reparation_dict = {}
        
        # for each of the reparation names, we check if several models are described in one line
        if serie_name != "Diverse Modeller":
            reparation_name_split = reparation_name.split(serie_name)
        else:
            reparation_name_split = [""]
        
        if len(reparation_name_split[0]) > 0:
            # several models described in one line
            phone_names = [(serie_name + x.strip()).replace(serie_name[-1] + serie_name[-1], serie_name[-1]) for x in reparation_name_split[1].split(",")]
            
            for phone_name in phone_names:
                
                phone_name = (brand_name + " " + phone_name).strip()
                # print(phone_name, "\t", reparation_name, "\t", price)

        else:
            phone_name = reparation_name.replace("Komplet skærm udskiftning", "")\
                                        .replace("Komplet Skærm Udskiftning", "")\
                                        .replace("LCD Udskiftning", "")\
                                        .replace("Glas Udskiftning", "")
            # print(phone_name, "\t", reparation_name, "\t", price)
            # phone_name = re.search(serie_name + "(\d{0,2})?.?\s?(\((PLUS )?\d{4}\))?(lpha|Plus|PLUS|Mini|Edge|Active|Duos|Neo|Advance)?(\s?\+)?", reparation_name).group(0)
            phone_name = (brand_name + " " + phone_name).strip()
            reparation_name = reparation_name.replace(phone_name, "") 
        # print(serie_dict)
        if phone_name in serie_dict:
            serie_dict[phone_name][reparation_name] = price
        else:
            reparation_dict[reparation_name] = price
            serie_dict[phone_name] = reparation_dict

    # print(serie_dict)    
    return serie_dict

# for serie, url in get_series_urls('Samsung', 'https://ihero.dk/samsung-reparation/').items():
#     get_phone_serie_reparations('Samsung', serie, url)

In [6]:
def get_phones_urls(brand_name, brand_url):
    html = urlopen(brand_url)
    soup = BeautifulSoup(html, "html.parser")
    phones_soups = soup.find_all("a", class_="col m:w1/1 brand-box")
    phone_names_urls = {}
    for phone_soup in phones_soups:
        phone_name = phone_soup.find("span", class_="brand-box-title").contents[0]
        url = phone_soup.get("href")
        phone_name = brand_name + " " + phone_name
        phone_names_urls[phone_name] = url
    return phone_names_urls

# get_phones_urls("Apple iPhones", "https://ihero.dk/iphone-reparation-2/")

In [7]:
def get_phone_reparations(phone_url):
    html = urlopen(phone_url)
    soup = BeautifulSoup(html, "html.parser")
    reparation_names = [x.contents[0] for x in soup.find_all("td", class_="rep-title")]
    prices = [x.contents[0] for x in soup.find_all("td", class_="price-field tilbuds-pris")]
    phone_dict = {}
    for (reparation_name, price) in zip(reparation_names, prices):
        # convert price to float
        price = float(price.replace(",-", "").replace('IKKE PÅ LAGER', "NaN").replace("GRATIS", "0").strip())
        phone_dict[reparation_name] = price
    return phone_dict

# get_phone_reparations("https://ihero.dk/iphone-11-pro-max-reparation/")

In [8]:
brand_urls_dict = get_brands_urls()

repair_cost_dict = {}
for brand, brand_url in brand_urls_dict.items():
    if brand == "Samsung":
        print(brand, brand_url)
        for serie, url in get_series_urls('Samsung', 'https://ihero.dk/samsung-reparation/').items():
            serie_dict = get_phone_serie_reparations('Samsung', serie, url)
            for phone_item in serie_dict.items():
                phone_name = phone_name = phone_item[0]
                reparations_dict = phone_item[1]
                repair_cost_dict[phone_name] = get_phone_reparations(phone_url)
    else:
        
        print(brand, brand_url)
        phone_urls_dict = get_phones_urls(brand, brand_url)
        for phone_item in phone_urls_dict.items():
            phone_name = phone_item[0]
            phone_url = phone_item[1]
            repair_cost_dict[phone_name] = get_phone_reparations(phone_url)
print("done")

Apple iPhones https://ihero.dk/iphone-reparation-2/
Apple iPads https://ihero.dk/ipad-reparation/
Apple Mac https://ihero.dk/mac-reparation/
Huawei https://ihero.dk/huawei/
Sony https://ihero.dk/sony-reparation/
One Plus https://ihero.dk/one-plus/
LG https://ihero.dk/lg-reparation/
Nokia https://ihero.dk/nokia-reparation/
PC https://ihero.dk/pc-reparation/
done


{'Apple iPhones IPhone 11 Pro Max': {'Skærm udskiftning': 2399.0,
  'Skærm beskyttelse': 200.0,
  'Udskiftning af Bagglas': 1500.0,
  'Batteri udskiftning': nan},
 'Apple iPhones iPhone 11 Pro': {'Skærm Udskiftning': 1999.0,
  'Skærmbeskyttelse': 200.0,
  'Udskiftning af Bagglas': 1500.0,
  'Batteri Udskiftning': nan},
 'Apple iPhones iPhone 11': {'Skærm udskiftning (OEM)': 1199.0,
  'Skærm beskyttelse': 200.0,
  'Udskiftning af Bagglas': 1500.0,
  'Batteri Udskiftning': nan},
 'Apple iPhones iPhone XS Max': {'Skærm udskiftning (Grade A+++)': 1499.0,
  'Skærm udskiftning (OEM)': 2999.0,
  'Skærmbeskyttelse': 200.0,
  'Udskiftning af Bagglas': 1000.0,
  'Batteri udskiftning': 999.0,
  'Ladestik udskiftning': 999.0,
  'Mikrofon udskiftning': 999.0,
  'Ørehøjtaler udskiftning': 799.0,
  'Bundhøjtaler udskiftning': 799.0,
  'Front kamera udskiftning': 999.0,
  'Bagkamera udskiftning': 1200.0,
  'Kameralinse udskiftning': 799.0,
  'Tænd/Sluk Funktion (Power knap) udskiftning': 999.0,
  'Vol

In [9]:
repair_cost_dict

{'Apple iPhones IPhone 11 Pro Max': {'Skærm udskiftning': 2399.0,
  'Skærm beskyttelse': 200.0,
  'Udskiftning af Bagglas': 1500.0,
  'Batteri udskiftning': nan},
 'Apple iPhones iPhone 11 Pro': {'Skærm Udskiftning': 1999.0,
  'Skærmbeskyttelse': 200.0,
  'Udskiftning af Bagglas': 1500.0,
  'Batteri Udskiftning': nan},
 'Apple iPhones iPhone 11': {'Skærm udskiftning (OEM)': 1199.0,
  'Skærm beskyttelse': 200.0,
  'Udskiftning af Bagglas': 1500.0,
  'Batteri Udskiftning': nan},
 'Apple iPhones iPhone XS Max': {'Skærm udskiftning (Grade A+++)': 1499.0,
  'Skærm udskiftning (OEM)': 2999.0,
  'Skærmbeskyttelse': 200.0,
  'Udskiftning af Bagglas': 1000.0,
  'Batteri udskiftning': 999.0,
  'Ladestik udskiftning': 999.0,
  'Mikrofon udskiftning': 999.0,
  'Ørehøjtaler udskiftning': 799.0,
  'Bundhøjtaler udskiftning': 799.0,
  'Front kamera udskiftning': 999.0,
  'Bagkamera udskiftning': 1200.0,
  'Kameralinse udskiftning': 799.0,
  'Tænd/Sluk Funktion (Power knap) udskiftning': 999.0,
  'Vol

In [10]:
reparations_names = []
for item in repair_cost_dict.items():
    phone_name = item[0]
    reparations = item[1]
    for reparation in reparations:
        reparations_names.append(reparation)

In [11]:
def convert_reparation_name(reparation_name):
    reparation_name_transformed = reparation_name
    if re.match("(.*Komplet (S|s)kærm.*)|(.*Glas/ LCD.*)|(.*Glas/LCD.*)|(.*LCD.*)|(.*(S|s)kærm.*)", reparation_name) is not None:
        reparation_name_transformed = "screen"
    if re.match("(.*3G.*)|(.*Antenne.*)", reparation_name) is not None:
        reparation_name_transformed = "3G_4G"
    if re.match(".*Batteri.*", reparation_name) is not None:
        reparation_name_transformed = "battery"
    if re.match("(.*BAGSIDE.*)|(.*Bagside.*)|(.*Bagcover.*)|(.*Bagglas.*)", reparation_name) is not None:
        reparation_name_transformed = "back_frame"
    if re.match("(.*GLAS.*)|(.*Glas.*)", reparation_name) is not None:
        reparation_name_transformed = "glass"
    if re.match(".*Bluetooth.*", reparation_name) is not None:
        reparation_name_transformed = "bluetooth"
    if re.match(".*WIFI.*", reparation_name) is not None:
        reparation_name_transformed = "wifi"
    if re.match("(.*BagKamera.*)|(.*Bagkamera.*)", reparation_name) is not None:
        reparation_name_transformed = "back_camera"
    if re.match(".*Front (K|k)amera.*", reparation_name) is not None:
        reparation_name_transformed = "back_camera"
    if re.match(".*Diagnose.*", reparation_name) is not None:
        reparation_name_transformed = "diagnose"
    if re.match(".*Ladestik.*", reparation_name) is not None:
        reparation_name_transformed = "charging_socket"
    if re.match("(.*Vibrator.*)", reparation_name) is not None:
        reparation_name_transformed = "vibrator"
    if re.match("(.*Tænd/(S|s)luk.*)", reparation_name) is not None:
        reparation_name_transformed = "power_cable"
    if re.match("(.*Volume/(L|l)ydløs.*)|(.*Lydløs.*)", reparation_name) is not None:
        reparation_name_transformed = "volume_button"
    if re.match("(.*Bundhøjtaler.*)|(.*Højtaler.*)|(.*Ørehøjtaler.*)", reparation_name) is not None:
        reparation_name_transformed = "speaker"
    if re.match("(.*Ramme.*)", reparation_name) is not None:
        reparation_name_transformed = "full_frame"
    if re.match("(.*Mikrofon.*)", reparation_name) is not None:
        reparation_name_transformed = "microphone"
    if re.match("(.*Kameralinse.*)", reparation_name) is not None:
        reparation_name_transformed = "camera_lens"
    if re.match("(.*audio.*)|(.*Jackstik.*)", reparation_name) is not None:
        reparation_name_transformed = "audio_port"
    if re.match("(.*(H|h)ome (K|k)nap.*)", reparation_name) is not None:
        reparation_name_transformed = "home_button"
    if re.match("(.*Andre knapper.*)", reparation_name) is not None:
        reparation_name_transformed = "other_button"      
    return reparation_name_transformed

In [12]:
for phone_name, reparations in repair_cost_dict.items():
    for reparation_name, reparation_cost in reparations.copy().items():
        reparations[convert_reparation_name(reparation_name)] = reparations.pop(reparation_name)

In [15]:
repair_cost_dict

{'Apple iPhones IPhone 11 Pro Max': {'screen': 200.0,
  'back_frame': 1500.0,
  'battery': nan},
 'Apple iPhones iPhone 11 Pro': {'screen': 200.0,
  'back_frame': 1500.0,
  'battery': nan},
 'Apple iPhones iPhone 11': {'screen': 200.0,
  'back_frame': 1500.0,
  'battery': nan},
 'Apple iPhones iPhone XS Max': {'screen': 200.0,
  'back_frame': 1000.0,
  'battery': 999.0,
  'charging_socket': 999.0,
  'microphone': 999.0,
  'speaker': 799.0,
  'back_camera': 1200.0,
  'camera_lens': 799.0,
  'power_cable': 999.0,
  'volume_button': 999.0,
  'vibrator': 799.0,
  'diagnose': 0.0},
 'Apple iPhones iPhone XS': {'screen': 200.0,
  'back_frame': 900.0,
  'battery': 799.0,
  'charging_socket': 999.0,
  'microphone': 999.0,
  'speaker': 799.0,
  'back_camera': 1200.0,
  'camera_lens': 799.0,
  'power_cable': 999.0,
  'volume_button': 999.0,
  'vibrator': 799.0,
  'diagnose': 0.0},
 'Apple iPhones iPhone XR': {'screen': 200.0,
  'back_frame': 900.0,
  'battery': 799.0,
  'charging_socket': 999.0,