# Store results of auction of Dienst Roerende Zaken
Monthy results of auction are publicized on http://www.domeinenrz.nl/catalogus. This Notebook scrapes the result from the drz website and parses the text and stores it in a dataframe.  
- - - 

### User variables
- `Date`: Date of current results. This is needed to create proper url  
- `Verbose`: Debug variable. 

In [1]:
Date = '2019-05' # yyyy-mm
Verbose = 0 # debug level

### Import modules

In [2]:
import pandas as pd
import urllib
from lxml import html, etree
import requests
import codecs 
import re
import time

In [3]:
# needed for new (as of feb '18) url format
import locale
locale.setlocale(locale.LC_TIME,'nl_NL')
pd.to_datetime('now').strftime('%A %d %B')
pd.to_datetime(Date,format='%Y-%m').strftime('%A %d %B')

'woensdag 01 mei'

### Internal variables

In [4]:
# website with results
url = 'http://www.domeinenrz.nl/catalogus'

# IRS (belastingdienst) auction was added. Naming of url changed.
if pd.to_datetime(Date,format='%Y-%m').strftime('%A %d %B') >= pd.to_datetime('2019-05',format='%Y-%m').strftime('%A %d %B'):
    url = 'http://www.domeinenrz.nl/catalogi'

### Read external data
These files are used to recognize text fragments. Regex patterns are mapped to field names.

In [5]:
tags=pd.read_csv('./regex-patterns/drz-re-patt-tag.txt',
                    comment='#',
                    header=None,
                    quotechar='"',
                    delimiter=",",
                    skipinitialspace=True).rename(columns={0 : 'Field',1 : 'Pattern'})
                    
flagtags=pd.read_csv('./regex-patterns/drz-re-patt-hastag.txt',
                     comment='#',
                     header=None,
                     quotechar='"',
                     delimiter=",",
                     skipinitialspace=True).rename(columns={0 : 'Field',1 : 'Pattern'})

repfragments=pd.read_csv('./regex-patterns/drz-re-patt-replace.txt',
                      comment='#',
                      header=None,
                      quotechar='"',
                      delimiter=",",
                      skipinitialspace=True).rename(columns={0 : 'Pattern',1 : 'Replace'}).fillna('')

### Functions

In [6]:
def gettree(baseurl,Lotid,Date=Date,disp=False):
    
    '''
    tree from url. Version where urls are formatted as 
    http://www.domeinenrz.nl/catalogus/verkoop_bij_inschrijving_2018-0009?=&meerfotos=K1800091800"
    later it even changed to:
    .  .  .  .  .  .  . klik_hier_voor_verkoop_bij_inschrijving_2018-0011
    '''
    
    # Change date format and extend to url
    Datestr = Date.replace('-','-00')
    if pd.to_datetime(Datestr,format='%Y-00%m') < pd.to_datetime('2018-0011',format='%Y-00%m'):
        baseurl += '/verkoop_bij_inschrijving_{:s}'.format(Datestr)
    elif pd.to_datetime(Datestr,format='%Y-00%m') < pd.to_datetime('2019-0002',format='%Y-00%m'):
        baseurl += '/klik_hier_voor_verkoop_bij_inschrijving_{:s}'.format(Datestr)
    elif pd.to_datetime(Datestr,format='%Y-00%m') < pd.to_datetime('2019-0005',format='%Y-00%m'):
        baseurl += '/verkoop_bij_inschrijving_{:s}_{:s}'.format(Datestr,pd.to_datetime(Datestr,format='%Y-00%m').strftime('%B'))
    else:
        baseurl += '/verkoop_bij_inschrijving_{:s}'.format(Datestr)
        
    # create url
    urldata = {}
    urldata[''] = '' # to create '=&'. This might be a bug in the site 
    
    # Add auction id
    urldata['veilingen'] = Datestr

    # Add lot number
    urldata['meerfotos'] = Lotid
    # generate url with urldata
    KavelUrl = baseurl + '?' + urllib.parse.urlencode(urldata)
    if disp: print(KavelUrl)
    
    # get html string
    req_success = False
    c=0
    while req_success == False:
        c+=1
        try:
            page = requests.get(KavelUrl)
            req_success = True
        except:
            if c > 10:
                raise Exception('Retried, but failed')
            else:
                print('pause 1 sec and try again!')
                time.sleep(1)
                req_success = False

    # find encoding
    DecodeType = page.headers["Content-type"]
    T = 'charset='
    DecodeType = DecodeType[DecodeType.find(T)+len(T):]
    # convert to unicode
    htmlstring = codecs.decode(page.content, DecodeType)
    # convert string to tree object
    tree = html.fromstring(htmlstring)
    
    return tree,KavelUrl

def gettree_v1(baseurl,Lot,Date=None,disp=False):
    
    '''
    tree from url. Version where urls are formatted as 
    http://www.domeinenrz.nl/catalogus?=&meerfotos=1799&veilingen=2018-09"
    '''
    
    # create url
    urldata = {}
    urldata[''] = '' # to create '=&'. This might be a bug in the site 
    # was date in input?
    if Date != None and len(Date) != 0:
        urldata['veilingen'] = Date
    # Add lot number
    urldata['meerfotos'] = Lot
    # generate url with urldata
    KavelUrl = baseurl + '?' + urllib.parse.urlencode(urldata)
    if disp: print(KavelUrl)
    
    # get html string
    page = requests.get(KavelUrl)
    # find encoding
    DecodeType = page.headers["Content-type"]
    T = 'charset='
    DecodeType = DecodeType[DecodeType.find(T)+len(T):]
    # convert to unicode
    htmlstring = codecs.decode(page.content, DecodeType)
    # convert string to tree object
    tree = html.fromstring(htmlstring)
    
    return tree,KavelUrl


def extractitem(tree,name,disp=False):
    "extract lines from tree"
    
    if name == "title":
        
        '''
        Return title of this page. This can be found in a H4 with class name 'title'.
        '''
            
        # path = '//body/div[@id="mainwrapper"]/div[@id="main"]/div[@class="wrapper"]/div[@class="article"]/div[@class="catalogus"]/div[@class="catalogusdetailitem split-item-first"]/a/h4[@class="title"]//text()'
        path = '//h4[@class="title"]/text()'
        return tree.xpath(path)[0].strip()

    elif name == "images":
        
        '''
        Return urls (src) of images. These are inside divs of class 'photo'
        '''
        
        lines = [item.get('src') for item in tree.xpath('//div[@class="photo"]/img')]
        
        if disp:
            print(lines)
        
        return lines
    
    elif name == "text":
        
        '''
        Just return all relevant text, which is in class 'catalogusdetailitem split-item-first'.
        '''
        
        lines=tree.xpath('//div[@class="catalogusdetailitem split-item-first"]/text()')
        
        if disp:
            print(len(lines))
        
        return lines
    
    elif name == "date":
        
        '''
        Return date of this auction by taking the title of the page.
        This is pretty obsolete, because date is given at start of this notebook.
        '''
        
        lines = tree.xpath('//title/text()')
        Date = lines[0]
        
        if 'Verkoop catalogus ' in Date:
            # title like "Verkoop catalogus 2017-12"
            Date = re.match('Verkoop catalogus (.*)',Date)[1]

        elif 'Verkoop bij inschrijving ' in Date:
            # title like "Verkoop bij inschrijving 2019-0001 januari"
            M = re.match('Verkoop bij inschrijving (20[0-9]{2})-00([0-9]{2}).*',Date)
            print(M.group(2))
            Date = '-'.join([M.group(1),M.group(2)])

        else:
            raise Exception('TODO: implement')

        # Date = Date.strip()
        # T = 'Verkoop catalogus '
        # Date = Date[Date.index(T)+len(T):]
        
        return Date
    
    elif name == "nextlot":
        
        '''
        Return number of next lot by checking out the link to the next lot in the current page.
        'K1900011801' will become 1801
        '''
        
        
        # link to next lot
        Link = tree.xpath('//div[@class="catalogusdetailitem split-item-first"]/div[4]/div[3]/a')
        Tar = Link[0].get("href")
        
        # extract lot name
        #nextLot = re.match('\?meerfotos=(.*)',Tar).group(1)
        nextLot = re.match('.*[\?,\&]meerfotos=(.*)(\&.*)?',Tar).group(1)

        if "&veilingen=" in nextLot:
            nextLot = re.match('(.*)&',nextLot).group(1)
            
        # convert to integer
        nextLot = int(nextLot[-4:])

        if disp:
            print(nextLot,Tar,etree.tostring(Link[0]))
                
        return nextLot
    
    elif name == "price":
        
        '''
        Return price as float
        '''
        
        
        # price can be bold or strong
        Price = tree.xpath('//div[@class="catalogusdetailitem split-item-first"]/strong/text()')
        if len(Price) == 0:
            Price = tree.xpath('//b/text()')

        if disp: print(Price)
        
        if len(Price) == 0:
            print('No Price found! use 0 for now')
            print(*tree.xpath('//*[@class="catalogusdetailitem split-item-first"]/text()'))
            Price = ['Niet gegund']
            raise Exception('Fix this')
        
        # select first in list (xpath returns lists)
        Price = Price[0]
            
        if Price == 'Na loting':
            Price = tree.xpath('//strong/text()')[0]
            Draw = True
        else:
            Draw = False        
 
        Tags = ['Zie kavel','Zie massakavel']# part of combination lot
        if any([Tag in Price for Tag in Tags]) :
            Price = 0
        elif Price == 'Niet gegund':
            Price = 0
        else:
            M = re.match(u'Gegund voor: \u20ac *([0-9,.]*,[0-9]{2}) *\(excl. alle eventuele bijkomende kosten en belastingen\)',Price)
            if disp:print(M.group(0))
            Price = float(M.group(1).replace('.','').replace(',','.'))
            # Tag1 = u'Gegund voor: \u20ac'
            # Tag2 = u'(excl. alle eventuele bijkomende kosten en belastingen)'
            # Price = float(Price[Price.index(Tag1)+len(Tag1):Price.index(Tag2)].strip().replace('.','').replace(',','.'))
            
        return Price,Draw
    

### First: Get all results from all pages
This will read all pages and the raw text is stored for later use.  
The "**next lot**" is linked in the current result. The function will look for this link and proceed. Because it is not know what the first lot will be, it is hard coded at `Lot = 1799`. It will increment with a step of `+1` to find the first lot. If the first lot is not (yet) found a period (`.`) is printed, otherwise the lot nummer will be printed. The console output should start with "`.`" (a period).  
Searching for next lots will continue untill the next lot has a **smaller** value that the current. This will cause the routine to stop when the last lot points back to the first lot.

In [7]:
# empty lists
AllLot = []
AllTree = []
AllKavelUrl = []
doLoop = True # set to false at the end.
Lot = 1799 # first
while doLoop:
    # Lot id
    # 'K1800091800'
    Lotid = 'K{:s}00{:s}{:.0f}'.format(Date[2:4],Date[5:8],Lot)
    
    # read page
    read_success = False
    c=0
    while read_success == False:
        c+=1
        if pd.to_datetime(Date, format = '%Y-%m') < pd.to_datetime('2018-9-1'):
            [Tree,KavelUrl]=gettree_v1(url,Lot=str(Lot),Date=Date,disp=Verbose>1)
        else:
            [Tree,KavelUrl]=gettree(url,Lotid=Lotid,Date=Date,disp=Verbose>1)
        Content = Tree.xpath('//*[@id="content"]/div[1]/b/text()')
        if Content == 'failed': # future
            if c > 10:
                raise Exception('Retried, but failed')
            else:
                print('pause 1 sec and try again!')
                time.sleep(1)
                read_success = False
        else:
            read_success = True
    
    if Content and Content[0] == 'Niets gevonden.':
        # Lot number does not exist
        NextLot = Lot + 1
        print('.',end='-')
    else :
        # find next number
        try:
            NextLot=extractitem(Tree,'nextlot')
        except:
            print(KavelUrl)
            print('try again',end='>')
            NextLot = Lot
#             continue
#             print (etree.tostring(Tree,pretty_print=True).decode('utf8'))
            raise 
                   
        # add current results to list
        AllLot.append(Lot)
        AllTree.append(Tree)
        AllKavelUrl.append(KavelUrl)
        print(str(Lot),end='>')
    if NextLot < Lot :
        # First Lot again. Break loop
        doLoop = False
    else :
        Lot = NextLot

print('.',end='X')


.-1800>1801>1802>1803>1804>1805>1806>1807>1808>1809>1810>1811>1812>1814>1815>1816>1818>1819>1820>1821>1822>1823>1824>1825>1826>1827>1828>1829>1830>1831>1832>1833>1834>1835>1837>1838>1839>1840>1841>1842>1843>1844>1845>1846>1847>1848>1849>1850>1851>1852>1853>1854>1855>1856>1857>1858>1859>1860>1861>1862>1863>1864>1865>1866>1867>1868>1869>1870>1871>1872>1873>1874>1875>1876>1877>1878>1879>1880>1881>2200>2201>2202>2203>2204>2205>2206>2207>2208>2209>2211>2213>2214>2216>2217>2218>2219>2220>2221>2222>2400>2401>2402>2403>2404>2405>2406>2407>2408>2409>2410>2411>2412>2413>2414>2415>2416>2417>2418>2419>2420>2422>2423>2424>2425>2600>2601>2602>2603>2604>2605>2606>2607>2608>2609>2610>2611>2612>2613>2614>2615>2616>2617>2619>2620>3000>3001>3002>3003>3004>3005>3006>3007>3008>3009>3010>3011>3012>3013>3014>3015>3016>3017>3018>3019>3020>3021>3022>3023>3024>3025>3026>3027>3028>3029>3030>3031>3032>3033>3034>8100>8101>8102>8103>8104>8105>8106>8107>8109>8110>8111>8112>8113>8114>8115>8116>8117>8119>8121>8122>812

### Basic parsing
Raw text is parsed for the first time. Some basics are stored in a pandas.DataFrame:  
- price
- image urls
- title
- ..

In [8]:
Verbose = 0
# empty list
out = None
# loop over all pages
for iK, tree in enumerate(AllTree):
    
    #
    # create an index
    #

    #   date
    if "Date" not in locals() or not Date:
        Date = extractitem(tree,'date',disp=Verbose>2)

    DT = pd.to_datetime(Date,format="%Y-%m")
    
    #   title and lot number
    title = extractitem(tree,'title')
    Lotid = re.match('Kavel (.*)',title).group(1)
    #Lotid = title[len('Kavel '):]
    if Lotid.startswith('K'):
        Lot = int(Lotid[-4:])
    else:
        Lot = int(Lotid)

    #   index
    IX = "-".join([str(DT.year),str(DT.month),str(Lot)])

    if Verbose>0: print(IX)
    
    
    #
    # extract images
    #
    
    
    image_urls = [re.sub('\/catalog((us)|(i))','',url) + item for item in extractitem(tree,'images',disp=Verbose>2)]
    
    #if Verbose>0: print(image_urls)
            
     
    #
    # Price
    #
    
    [Price,Draw] = extractitem(tree,'price',disp=Verbose>2)
        
        
        
    #    
    # add to data frame
    #
    
    out = pd.concat([out,pd.DataFrame({'Source' : AllKavelUrl[iK],
                                      'Title' : title,
                                      'Price' : Price,
                                      'Draw' : Draw,
                                      'Raw_text' : [extractitem(tree,'text')],
                                      'N_images' : len(image_urls),
                                      'Images' : [image_urls]},
                                      index = [IX])])
out.tail()

Unnamed: 0,Source,Title,Price,Draw,Raw_text,N_images,Images
2019-5-9601,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K1900059601,1117.0,False,"[K1900059601\r, Demontage fietsen\r, Partij ca...",1,[http://www.domeinenrz.nl/ufc/static/155661817...
2019-5-9602,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K1900059602,0.0,False,"[K1900059602\r, LUCHTBEHANDELINGSKAST\r, AIRBO...",3,[http://www.domeinenrz.nl/ufc/static/155661777...
2019-5-9700,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K1900059700,3000.0,False,"[K1900059700\r, Partij isolatieplaten\r, w.o. ...",3,[http://www.domeinenrz.nl/ufc/static/155661777...
2019-5-9701,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K1900059701,0.0,False,"[K1900059701\r, Generator/ warmtekrachtmodule\...",5,[http://www.domeinenrz.nl/ufc/static/155661777...
2019-5-9702,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K1900059702,655.0,False,"[K1900059702\r, Watergekoelde airco\r, OPTICLI...",2,[http://www.domeinenrz.nl/ufc/static/155661776...


### In depth parsing
Do some more sofisticated parsing. Use `Raw_text` as input.  
When a line is not recognized. It will be printed to console. One might choose to add to the external text files if a fragment of tag occurs often.

In [9]:
Verbose = 0
# parse raw text
for IX in out.index :
    
    # find info
    
    rt = out.loc[IX,"Raw_text"]
    
    # first line:
    
    # Is it a draw?
    Val = rt.pop(0) 
    if Val == 'Na loting':
        Val = rt.pop(0) # val is now kavelnr
        out.loc[IX,"Draw"] = True
    else:
        out.loc[IX,"Draw"] = False
    
    # when lot number is followed by an asteriks there is a note
    if Val.endswith('*\r'):
        Val = Val[0:-2]
        out.loc[IX,"Note"] = True
    else :
        Val = Val.strip()
        out.loc[IX,'Note'] = False
        
    if Verbose>0:
        print(Val)

    # store lot nr        
    out.loc[IX,"LotNr"]=Val
    
    
    # second line
    out.loc[IX,"LotType"]=rt.pop(0).strip()

    # third line
    Val = rt.pop(0).strip()
    # This line is brand or optional line with type of lot
    # All caps is brand
    if Val in ['Quad','Kampeerwagen/ camper','Pleziervaart motorvaartuig met opbouw en open kuip','Rubberboot'] or not Val.isupper():
        out.loc[IX,"LotType"] += ''.join([' (' + Val + ')'])
        if Verbose>0:print(Val)
        Val = rt.pop(0).strip() # now it is brand
    out.loc[IX,"ItemBrand"]=Val

    
    
    # escape characters, repair typos and translate 
    for i in range(len(rt)):
        
        # encode string as bytes
        rt[i] = rt[i].encode('ascii',errors='xmlcharrefreplace')
        
        # replace text
        for pat,sub in zip(repfragments.Pattern,repfragments.Replace):
            rt[i] = re.sub(pat.encode('ascii',errors='xmlcharrefreplace'),sub.encode('ascii',errors='xmlcharrefreplace'),rt[i])
        
        # decode back to string, but special characters escaped to xml
        rt[i]=rt[i].decode('ascii')

    # Pull value after trailing or leading pattern (bgntag/endtag)
    for Tag,Field in zip(tags.Pattern,tags.Field):
        M = re.search(Tag,'\n'.join(rt))
        if M:
            Val = M.group('val')
            if Verbose>2:
                print(str(Field) + ' : ' + M.group(0).replace('\n','[newline]') + '\n\t' + '|' + Val + '|')
            # remove pattern and make rt a list again.
            rt = '\n'.join(rt).replace(M.group(0),'').split('\n')
        else:
            Val = ''
        out.loc[IX,Field] = Val        

    # Pattern in full text? (flagtag)
    for Tag,Field in zip(flagtags.Pattern,flagtags.Field):
        # flagtags might occur more than once, hence a list of finditer results
        Ms = list(re.finditer(Tag,'\n'.join(rt)))
        if Ms:
            Val = True
            for M in Ms:
                if Verbose>2:
                    print(str(Field) + ' : ' + M.group(0).replace('\n','[newline]') + '\n\t' + '|' + str(Val) + '|')
                # remove pattern and make rt a list again.
                rt = '\n'.join(rt).replace(M.group(0),'').split('\n')
        else:
            Val = False
        out.loc[IX,Field] = Val

        
        
    # loop trough remaining lines

    for line in rt:
               
        # do comparison in bytes
        line = line.encode('ascii',errors='xmlcharrefreplace')
        if Verbose>2:
            print(line)
            
        # parsing
        isParsed = False # some accounting: in the end this line should be parsed
         
        # line is empty.. skip .. next
        if not line :# empty
            isParsed = True
            continue
            
        # line starting with '*' is a note
        if out.loc[IX,'Note'] and line.startswith(bytes('*','ascii')):
            if Verbose>2:
                print('\tNote:',end='')
                print(out.loc[IX,'Note'],end='')
                print(line)
            Val = line[1:].decode('ascii')
            out.loc[IX,'Note'] = Val
            isParsed = True
            continue
                
        if isParsed == False:
            line = line.decode('ascii')
            
            # create empty string if not exist
            if (
                'SupInfo' not in out.loc[IX].index
            ) or (
                (
                    type(out.loc[IX,'SupInfo']) != str
                ) and (
                    pd.np.isnan(out.loc[IX,'SupInfo'])
                )
            ):
                out.loc[IX,'SupInfo'] = ''
            out.loc[IX,"SupInfo"] = '\n'.join([out.loc[IX,'SupInfo'] , str(line)])
            print(str(IX) + '[' + str(line) + ']')
  

2019-5-1818[Stuurslot kapot]
2019-5-1838[Scooter heeft voorschade]
2019-5-1853[Rem en gaspedal omgekeerd.]
2019-5-1860[De steun voor de uitlaat is afgebroken van het motorblok]
2019-5-1877[Acculader aanwezig]
2019-5-1878[kenteken KV-86-34]
2019-5-1879[kenteken KV-86-32]
2019-5-1880[kenteken KV-86-35]
2019-5-1881[kenteken KV-86-33]
2019-5-2401[open wagen]
2019-5-2406[Linker portier gaat niet open]
2019-5-2412[Buitenboordmotor]
2019-5-2412[MERCURY]
2019-5-2412[Motornummer 0T553296]
2019-5-2412[Motorvermogen 115PK]
2019-5-2412[Aanhangwagen]
2019-5-2412[Enkelasser]
2019-5-2412[Merk onbekend]
2019-5-2412[Type onbekend]
2019-5-2412[Bouwjaar onbekend]
2019-5-2413[Datum eerste registratie 23-08-2010]
2019-5-2413[Motor]
2019-5-2413[MERCRUISER]
2019-5-2413[Motornummer 0F1551173]
2019-5-2413[Motorvermogen 150 kW / 204 pk]
2019-5-2413[Aanhangwagen]
2019-5-2413[Dubbelasser]
2019-5-2413[RIBA]
2019-5-2413[Type bvt 2000]
2019-5-2413[Kenteken 58-WG-LS]
2019-5-2414[Motor]
2019-5-2414[JOHNSON]
2019-5-241

In [10]:
out.tail()

Unnamed: 0,Source,Title,Price,Draw,Raw_text,N_images,Images,Note,LotNr,LotType,...,rhd,no_odo,no_road,disclaim_cr6,crewcab,carwrap,no_vin,d_lic,btw21,SupInfo
2019-5-9601,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K1900059601,1117.0,False,"[, Het kan zijn er dat er belangrijke onderdel...",1,[http://www.domeinenrz.nl/ufc/static/155661817...,False,K1900059601,Demontage fietsen (Partij ca. 50 stuks),...,False,False,False,False,False,False,False,False,False,\nHet kan zijn er dat er belangrijke onderdele...
2019-5-9602,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K1900059602,0.0,False,"[Type S40/16Q, Uitvoering 21.675 m3/HR @ 500pa...",3,[http://www.domeinenrz.nl/ufc/static/155661777...,False,K1900059602,LUCHTBEHANDELINGSKAST,...,False,False,False,False,False,False,False,False,False,\nUitvoering 21.675 m3/HR @ 500pa\nDubbelwandi...
2019-5-9700,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K1900059700,3000.0,False,"[Dikte 80 mm, Lengtes van 200 tot 290 cm*, Lic...",3,[http://www.domeinenrz.nl/ufc/static/155661777...,False,K1900059700,Partij isolatieplaten (w.o. type sandwich),...,False,False,False,False,False,False,False,False,False,\nDikte 80 mm\nLengtes van 200 tot 290 cm*\nLi...
2019-5-9701,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K1900059701,0.0,False,"[Type GG140SoE, Bouwjaar 2009, Aardgas, 140 kW...",5,[http://www.domeinenrz.nl/ufc/static/155661777...,False,K1900059701,Generator/ warmtekrachtmodule,...,False,False,False,False,False,False,False,False,False,\nAardgas\n140 kW\nDeze kavel dient op afspraa...
2019-5-9702,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K1900059702,655.0,False,"[Type J15000 PRO 3, 14,7 kW, Bouwjaar 2017, Af...",2,[http://www.domeinenrz.nl/ufc/static/155661776...,False,K1900059702,Watergekoelde airco,...,False,False,False,False,False,False,False,False,False,"\n14,7 kW\nAfmetingen ca. 127 x 83 x 54,5 cm\n..."


### Save results to disk

In [11]:
file_name = '../data/drz-data-{}.pkl'.format(Date)
print(file_name)
out.to_pickle(file_name)

../data/drz-data-2019-05.pkl


# Next: add rdw data

Because rdw data changes constantly it is advisable to run the notebook that adds rdw data to the above results soon.