In [1]:
import json
import locale
import requests
import unicodedata

from datetime import datetime
import pandas as pd
from bs4 import BeautifulSoup

---- using HTML response ----

In [2]:
url = "https://www.hemnet.se/salda/bostader"
params= {
    #'housing_form_groups':'apartments',
    'location_ids':'898472',
    'item_types':'bostadsratt',
    'sold_age':'3d' #1d,4d,1w,1m and so on...
}
payload={}

headers = {
    'User-Agent': 'Mozilla/5.0'
}

response = requests.request("GET", url, headers=headers, data=payload, params=params)
soup = BeautifulSoup(response.content, "html.parser")
response.url

'https://www.hemnet.se/salda/bostader?location_ids=898472&item_types=bostadsratt&sold_age=3d'

In [3]:
locale.getdefaultlocale()
locale.setlocale(locale.LC_TIME, "sv_SE")

'sv_SE'

In [4]:
#list to add our objects to
item_list=[]

#find the ul with the results list
result_ul=soup.find('ul','sold-results')

#get the list items with our data (filter out the ads)
result_li=result_ul('li', 'sold-results__normal-hit')

#print(len(result_li))

#start loop...
for item in result_li:
    #a dict to collect all key/values in
    item_dict={}

    #get tracking data json for ids...
    tracking_data=item.attrs['data-tracking-data']
    tracking_data_json=json.loads(tracking_data)

    #add ids
    item_dict['listingId']=tracking_data_json['listingId']
    item_dict['saleId']=tracking_data_json['saleId']
    
    #get the anchor href for the url
    result_li_a=item.find('a','sold-property-link')
    item_dict['url']=result_li_a['href']

    #get our sale listing info
    result_price_info=result_li_a.find('div','sold-property-listing__location')

    #address
    address=result_li_a.find('h2','sold-property-listing__heading').string.strip()
    #address=address.replace(' / ','/') #unicode getting in the way again?
    item_dict['address']=address

    #fee
    fee=result_li_a.find('div','sold-property-listing__fee').string.strip()
    fee=int("".join(unicodedata.normalize("NFKD",fee).strip('kr/mån').split()))
    item_dict['fee']=fee

    #living area
    #area=result_li_a.find('div','sold-property-listing__area').string.strip()
    #area=int("".join(unicodedata.normalize("NFKD",fee).strip('kr/mån').split()))
    #item_dict['area']=area
    area_el=result_li_a.find('div','sold-property-listing__area')
    if area_el:
        area=area_el.text.strip()
        area_arr="".join(area.split('m²')).split()

        span_el = result_li_a.find('span', 'listing-card__attribute--normal-weight')
        if span_el: 
            total_area=int(area_arr[0])+int(area_arr[2])
            item_dict['living_area']=total_area
            item_dict['rooms']=area_arr[3]
        else:
            item_dict['living_area']=area_arr[0]
            item_dict['rooms']=area_arr[1] 
    
    #get our sale price info
    result_price_info=result_li_a.find('div','sold-property-listing__price-info')

    #ending date
    end_date_el=result_price_info.find('div',class_="sold-property-listing__sold-date")
    if end_date_el:
        end_date_str=end_date_el.string.strip()
        end_date_str=end_date_str.replace('Såld ','')
        end_date=datetime.strptime(end_date_str, '%d %B %Y')
        item_dict['end_date']=end_date

    #end price
    end_price_el=result_price_info.find('div',class_="sold-property-listing__subheading")
    if end_price_el:
        end_price_str=end_price_el.string.strip()
        end_price=int("".join(end_price_str.strip(' kr').replace('Slutpris ','').split()))
        item_dict['end_price']=end_price

    #end price/m2
    end_price_area_el=result_price_info.find('div',class_="sold-property-listing__price-per-m2")
    if end_price_area_el:
        end_price_area_str=end_price_area_el.string.strip()
        end_price_area=int("".join(unicodedata.normalize("NFKD",end_price_area_str).strip('kr/m2').split()))
        item_dict['end_price_area']=end_price_area
    
    #price change
    end_price_change_el=result_price_info.find('div',class_="sold-property-listing__price-change")
    if end_price_change_el:
        end_price_change_str=end_price_change_el.string.strip()
        end_price_change_str="".join(end_price_change_str.strip(' %').split())
        end_price_change=int(end_price_change_str[1:])
        end_price_change_sign=end_price_change_str[0]

        #calculate rate (percentage in float)
        if end_price_change !=0:
            item_dict['end_price_change_rate']=eval(f'100{end_price_change_sign}{end_price_change}')/100
        elif end_price_change==0:
            item_dict['end_price_change_rate']=100/100

        item_dict['end_price_change']=end_price_change
        item_dict['end_price_change_sign']=end_price_change_sign

    item_list.append(item_dict)

#print(item_list)

In [5]:
df=pd.json_normalize(item_list)
df.head(3)

Unnamed: 0,listingId,saleId,url,address,fee,living_area,rooms,end_date,end_price,end_price_area,end_price_change_rate,end_price_change,end_price_change_sign
0,19487181,3712367622981951270,https://www.hemnet.se/salda/lagenhet-2rum-mari...,Tavastgatan 28,2261,42,2,2023-01-27,4100000,97619,1.04,4.0,+
1,19505936,7650211919900293632,https://www.hemnet.se/salda/lagenhet-3rum-sode...,"Repslagargatan 23, vån 4",2969,77,3,2023-01-27,6870000,89221,1.1,10.0,+
2,19535502,2905477317003764455,https://www.hemnet.se/salda/lagenhet-1rum-sode...,Heleneborgsgatan 10B,2344,34,1,2023-01-27,3580000,105294,1.09,9.0,+


In [8]:
result_li

[<li class="sold-results__normal-hit" data-tracking-data='{"listingCardContext":"result_list","listingStatus":"sale","listingPromoType":"basic","listingId":19487181,"listingType":"common_listing","listingPosition":1,"saleId":3712367622981951270}' data-tracking-index="1">
 <a class="sold-property-link js-sold-property-card-link" data-tracking-sold-object-card-type="basic" href="https://www.hemnet.se/salda/lagenhet-2rum-mariaberget-stockholms-kommun-tavastgatan-28-3712367622981951270">
 <div class="sold-property-listing qa-sale-card">
 <div class="sold-property-listing__info">
 <div class="sold-property-listing__location">
 <h2 class="sold-property-listing__heading qa-selling-price-title">
           Tavastgatan 28
         </h2>
 <div>
 <span class="property-icon property-icon--result"><svg height="16" viewbox="0 0 14 16" width="14" xmlns="http://www.w3.org/2000/svg"><title>Lägenhet</title><desc><span class="svg-icon__fallback-text">Lägenhet</span></desc><path class="svg-icon__shape" d=

import json
import unicodedata
from datetime import datetime


#find the ul with the results list
result_ul=soup.find('ul','sold-results')

#get the list items with our data (filter out the ads)
result_li=result_ul('li', 'sold-results__normal-hit')

print(len(result_li))

#start loop...
for item in result_li:
    
    #get the anchor href for the url
    result_li_a=item.find('a','sold-property-link')

    #living area
    area=result_li_a.find('div','sold-property-listing__area').string.strip()
    area_arr="".join(area.split('m²')).split()
    rooms=area_arr[1]
    living_area=area_arr[0]
    #area=int("".join(unicodedata.normalize("NFKD",area).strip('kr/mån').split()))
    #address
    #address_str=result_price_info.find('div',class_='sold-property-listing__location').string.strip()
    #print(address_str)