### Testing scraper with Amsterdam

In [1]:
import os
os.chdir("..")

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [8]:
import json
from scraper.scrape import API, URL

In [4]:
URL

'https://www.numbeo.com/cost-of-living/'

In [5]:
api = API(URL, "Netherlands")

In [7]:
amsterdam = api.get_single_city("Amsterdam")

In [12]:
print(json.dumps(amsterdam, indent=2))

{
  "\n\nRestaurants": [
    [
      "Meal, Inexpensive Restaurant ",
      " 16.00\u00a0\u20ac",
      "\n\n11.00-30.00"
    ],
    [
      "Meal for 2 People, Mid-range Restaurant, Three-course ",
      " 70.00\u00a0\u20ac",
      "\n\n50.00-90.00"
    ],
    [
      "McMeal at McDonalds (or Equivalent Combo Meal) ",
      " 9.00\u00a0\u20ac",
      "\n\n8.00-10.00"
    ],
    [
      "Domestic Beer (0.5 liter draught) ",
      " 5.00\u00a0\u20ac",
      "\n\n4.00-8.00"
    ],
    [
      "Imported Beer (0.33 liter bottle) ",
      " 4.50\u00a0\u20ac",
      "\n\n3.10-6.00"
    ],
    [
      "Cappuccino (regular) ",
      " 3.46\u00a0\u20ac",
      "\n\n2.00-5.00"
    ],
    [
      "Coke/Pepsi (0.33 liter bottle) ",
      " 2.67\u00a0\u20ac",
      "\n\n2.00-4.00"
    ],
    [
      "Water (0.33 liter bottle)  ",
      " 2.29\u00a0\u20ac",
      "\n\n2.00-3.00"
    ]
  ],
  "\n\nMarkets": [
    [
      "Milk (regular), (1 liter) ",
      " 1.06\u00a0\u20ac",
      "\n\n0.90-1.49"
 

In [13]:
amsterdam["\n\nRent Per Month"]

[['Apartment (1 bedroom) in City Centre ',
  ' 1,536.90\xa0€',
  '\n\n1,300.00-1,900.00'],
 ['Apartment (1 bedroom) Outside of Centre ',
  ' 1,230.00\xa0€',
  '\n\n1,000.00-1,500.00'],
 ['Apartment (3 bedrooms) in City Centre ',
  ' 2,622.11\xa0€',
  '\n\n2,100.00-4,000.00'],
 ['Apartment (3 bedrooms) Outside of Centre ',
  ' 1,975.14\xa0€',
  '\n\n1,660.00-2,500.00']]

### Processing keys

In [14]:
print([key for key in amsterdam])

['\n\nRestaurants', '\n\nMarkets', '\n\nTransportation', '\n\nUtilities (Monthly)', '\n\nSports And Leisure', '\n\nChildcare', '\n\nClothing And Shoes', '\n\nRent Per Month', '\n\nBuy Apartment Price', '\n\nSalaries And Financing']


In [24]:
ams_clean = {elem[2:]:amsterdam[elem] for elem in list(amsterdam.keys())}
ams_clean.keys()

dict_keys(['Restaurants', 'Markets', 'Transportation', 'Utilities (Monthly)', 'Sports And Leisure', 'Childcare', 'Clothing And Shoes', 'Rent Per Month', 'Buy Apartment Price', 'Salaries And Financing'])

#### Processing rent

In [None]:
rent_idx = ('Rent Per Month', 0, 1) # 'Apartment (1 bedroom) in City Centre' - mean value

In [42]:
rent = ams_clean[rent_idx[0]][rent_idx[1]][rent_idx[2]]
rent

' 1,536.90\xa0€'

In [45]:
rent = rent.replace(u'\xa0', u'').strip()[:-1]
rent

'1,536.90'

In [47]:
float(rent.replace(',', ''))

1536.9

### Generalizing

In [49]:
def get_formatted_price(
    city_info: dict, 
    group_key: str = "", 
    price_idx: tuple = (0, 1)):
    
    price_str = city_info[group_key][price_idx[0]][price_idx[1]]
    price_str = price_str.replace(u'\xa0', u'').strip()[:-1]
    return float(price_str.replace(',', ''))

In [50]:
get_formatted_price(ams_clean, "Rent Per Month", (0, 1))

1536.9

In [52]:
(get_formatted_price(ams_clean, "Restaurants", (0, 1)), 
 get_formatted_price(ams_clean, "Restaurants", (1, 1)),
 get_formatted_price(ams_clean, "Restaurants", (2, 1)),)

(16.0, 70.0, 9.0)

In [55]:
[get_formatted_price(ams_clean, "Restaurants", (i, 1)) for i in range(len(ams_clean["Restaurants"]))]

[16.0, 70.0, 9.0, 5.0, 4.5, 3.46, 2.67, 2.29]

### Sum of full categories

In [61]:
features_to_sum = ('Restaurants', 'Markets', 'Utilities (Monthly)')
rent_feature = 'Rent Per Month'

In [62]:
expenses = {f: sum([get_formatted_price(ams_clean, f, (i, 1)) for i in range(len(ams_clean[f]))]) 
               for f in features_to_sum}
expenses

{'Restaurants': 112.92, 'Markets': 74.4, 'Utilities (Monthly)': 215.34}

In [63]:
expenses[rent_feature] = get_formatted_price(ams_clean, rent_feature, (0, 1))
expenses

{'Restaurants': 112.92,
 'Markets': 74.4,
 'Utilities (Monthly)': 215.34,
 'Rent Per Month': 1536.9}

In [65]:
sum(expenses.values())

1939.56