In [None]:
import requests
import pandas as pd
import time

# Seminar - APIs and real-life coding

## Task 1: Requesting API
### 1a. Create a function requesting data from sreality

```python
base_url = 'https://www.sreality.cz/api/cs/v2/estates?category_main_cb=1&category_type_cb=1&locality_region_id=10&per_page60&page={}'.format(i)

r = requests.get(base_url)
d = r.json()
```

* function should parametrize: 
    * `category_main_cb` - `{'flat':1, 'house':2, 'land':3 }`
    * `category_type_cb` - `{'sell':1,'rent':2}`
    * `locality_region_id` - `{'Praha':10,'Brno':14}`
    * `page` parameter
* use string inputs for `category_main_cb` and `category_type_cb`
* test the validity of inputs
* include try/except clause to handle errors
* function should return JSON data in python types
* do not forget to sleep each request at least 0.5s

In [None]:
def getting_sreality(category_main_cb, category_type_cb, locality_region_id, page):
    try:
        if requests.get('https://www.sreality.cz/api/cs/v2/estates?category_main_cb={}&category_type_cb={}&locality_region_id={}&per_page60&page={}'.format(category_main_cb, category_type_cb, locality_region_id, page)).ok:
            return requests.get('https://www.sreality.cz/api/cs/v2/estates?category_main_cb={}&category_type_cb={}&locality_region_id={}&per_page60&page={}'.format(category_main_cb, category_type_cb, locality_region_id, page)).json()
    except:
        return "the request was unsuccessful"

main_dict = {'flat':1, 'house':2, 'land':3 }
type_dict = {'sell':1,'rent':2}
locality_dict = {'Praha':10,'Brno':14}

def getting_value(inp, dict):
    if inp in dict.keys():
        return dict[inp]
    else:
        try:
            getting_value(input("wrong input, try again: "), dict)
        except:
            return "you have exceeded the number of tries"
            

category_main_cb = getting_value(input("category_main_cb input: "), main_dict)
category_type_cb = getting_value(input("category_type_cb input: "), type_dict)
locality_region_id = getting_value(input("category_main_cb_input: "), locality_dict)
#page = int(input("page input: "))

### 1b. Create a function converting sreality json data into pandas dataframe

In [None]:
def sreality_to_pd(json_data):
    try:
        return pd.read_json(json_data)
    except:
        return json_data

### 1c. link function `1b` into function `1a`

In [None]:
sreality_to_pd(getting_sreality(category_main_cb, category_type_cb, locality_region_id, page))


{'meta_description': '5905 realit v nabídce prodej bytů Praha. Vyberte si novou nemovitost na sreality.cz s hledáním na mapě a velkými náhledy fotografií nabízených bytů.',
 'result_size': 5905,
 '_embedded': {'estates': [{'labelsReleased': [['new_building',
      'not_furnished'],
     []],
    'has_panorama': 0,
    'labels': ['Novostavba', 'Nevybavený'],
    'is_auction': False,
    'labelsAll': [['new_building',
      'personal',
      'balcony',
      'cellar',
      'elevator',
      'parking_lots',
      'garage',
      'not_furnished'],
     ['playground',
      'small_shop',
      'candy_shop',
      'tavern',
      'theater',
      'vet',
      'movies',
      'sightseeing',
      'tram',
      'train',
      'post_office',
      'kindergarten',
      'drugstore',
      'bus_public_transport',
      'school',
      'metro',
      'shop',
      'medic',
      'restaurant',
      'atm',
      'sports']],
    'seo': {'category_main_cb': 1,
     'category_sub_cb': 6,
     'catego

### 1c. Combining multiple requests into single df

* Function should parametrize:
    * `start_page` and `end_page`
    * request parameters
* construct a list of individual request dfs
* then feed it into `pd.concat` function

In [None]:
start_page = int(input("start page input: "))
end_page = int(input("end page input: "))

In [None]:
def single_df(start_page, end_page, category_main_cb, category_type_cb, locality_region_id):
    l = [sreality_to_pd(getting_sreality(category_main_cb, category_type_cb, locality_region_id, i)) for i in range(start_page, end_page + 1)]
    return pd.concat([pd.json_normalize(df["_embedded"]["estates"], max_level = 0) for df in l]).reset_index(drop = True)



## Task 2: Cleaning data

### 2a. Filter columns
* filter only columns: `['locality', 'price', 'name', 'gps','hash_id','labelsAll','exclusively_at_rk']`
* use `.copy()` to avoid `SettingWithCopyWarning` later


In [None]:
df = single_df(start_page, end_page, category_main_cb, category_type_cb, locality_region_id)[['locality', 'price', 'name', 'gps','hash_id','labelsAll','exclusively_at_rk']].copy(deep = True)

### 2b: GPS
* Convert dictionary in `gps` column into two columns - `lat` and `lon`
* use apply function on gps column
* Note apply can return multiple columns

In [None]:
df = pd.concat([df, df["gps"].apply(pd.Series)], axis = 1).copy(deep = True)

### 2b. Get flat type from name
* Name is always represented by string `Prodej bytu [type of flat] [Area] m^2`
* try picking third word in string
* check meaningfulness using `.value_counts()`

In [None]:
df["type_of_flat"] = df["name"].apply(lambda v: v.split()[2])

### 2c. Get area from name
* Naive: select the word before last word
* Then try navigating using the index of `'m²'`
* if this also fail, then you will need to use regex

In [None]:
df["area"] = df["name"].apply(lambda v: int(v.split()[3]))

## Task 3 (Homework): Convert column`labelsAll` into boolean variables

### Task 3a. Get all possible label names
* deal with nested-list structure
* Hint: try sum the whole column to get a nested list of lists.
* Then flatten the nested list (2D to 1D)
* Finally keep only unique elements


In [None]:
df["labelsAll"] = df["labelsAll"].apply(lambda x: [item for sublist in x for item in sublist])

In [None]:
pd.Series([item for sublist in [i for i in df["labelsAll"]] for item in sublist]).unique()

array(['new_building', 'personal', 'loggia', 'terrace', 'cellar',
       'elevator', 'not_furnished', 'playground', 'tavern', 'small_shop',
       'movies', 'theater', 'candy_shop', 'vet', 'natural_attraction',
       'kindergarten', 'tram', 'sports', 'school', 'bus_public_transport',
       'metro', 'restaurant', 'drugstore', 'medic', 'atm', 'post_office',
       'train', 'shop', 'after_reconstruction', 'brick', 'sightseeing',
       'balcony', 'parking_lots', 'garage', 'partly_furnished', 'panel',
       'collective', 'furnished', 'in_construction'], dtype=object)

### 4b. Test existence of label `cellar` for offers
* again deal with nested list of list structure
* write generic function `test_existence_of_label(offer_labels,label)`

In [None]:
df.head(50)

Unnamed: 0,locality,price,name,gps,hash_id,labelsAll,exclusively_at_rk,lat,lon,type_of_flat,area
0,Praha 5 - Hlubočepy,13642000,Prodej bytu 3+kk 88 m²,"{'lat': 50.02206976811756, 'lon': 14.389917231...",834861916,"[new_building, personal, loggia, terrace, cell...",1,50.02207,14.389917,3+kk,88
1,Praha 3 - Žižkov,15504000,Prodej bytu atypické 146 m²,"{'lat': 50.07649476811756, 'lon': 14.459372231...",3750241356,"[personal, after_reconstruction, terrace, bric...",1,50.076495,14.459372,atypické,146
2,Praha 4 - Záběhlice,4809000,Prodej bytu 2+1 57 m²,"{'lat': 50.03731776811756, 'lon': 14.504512231...",1759475532,"[personal, balcony, brick, elevator, parking_l...",1,50.037318,14.504512,2+1,57
3,Praha 9 - Vysočany,6195000,Prodej bytu 1+kk 36 m²,"{'lat': 50.10492576811756, 'lon': 14.508641231...",1321411660,"[new_building, personal, terrace, cellar, elev...",0,50.104926,14.508641,1+kk,36
4,Praha 9 - Vysočany,16538000,Prodej bytu 4+1 181 m²,"{'lat': 50.093629768117566, 'lon': 14.51848423...",2930648140,"[new_building, personal, balcony, terrace, bri...",0,50.09363,14.518484,4+1,181
5,Praha 6 - Veleslavín,5352000,Prodej bytu 2+kk 46 m²,"{'lat': 50.08246076811756, 'lon': 14.341159231...",464053324,"[personal, brick, cellar, not_furnished, playg...",1,50.082461,14.341159,2+kk,46
6,Praha 9 - Kyje,10332000,Prodej bytu 4+kk 91 m²,"{'lat': 50.104856768117564, 'lon': 14.57564323...",2118333516,"[new_building, personal, balcony, brick, eleva...",1,50.104857,14.575643,4+kk,91
7,Praha 8 - Troja,9257000,Prodej bytu 2+kk 79 m²,"{'lat': 50.109792768117565, 'lon': 14.45155123...",2879833164,"[personal, terrace, cellar, elevator, garage, ...",1,50.109793,14.451551,2+kk,79
8,Praha 3 - Žižkov,5688000,Prodej bytu 2+kk 48 m²,"{'lat': 50.08008376811756, 'lon': 14.494732231...",2361599052,"[personal, loggia, panel, cellar, elevator, ca...",0,50.080084,14.494732,2+kk,48
9,Praha 2 - Nové Město,5781000,Prodej bytu 1+1 36 m²,"{'lat': 50.06598076811756, 'lon': 14.438227231...",3266631500,"[personal, brick, playground, small_shop, vet,...",0,50.065981,14.438227,1+1,36


### 4c. Test existence of all possible labels
* use apply returning series with all labels