# Store results of auction of Dienst Roerende Zaken
Monthy results of auction are publicized on http://www.domeinenrz.nl/catalogus. This Notebook scrapes the result from the drz website and parses the text and stores it in a dataframe.  
- - - 

### User variables
- `Date`: Date of current results. This is needed to create proper url  
- `Verbose`: Debug variable. 

In [None]:
Date = '2020-01' # yyyy-mm
Verbose = 0 # debug level

toggle_to_opbod = True

### Import modules

In [2]:
import pandas as pd
import urllib
from lxml import html, etree
import requests
import codecs 
import re
import time

In [3]:
# needed for new (as of feb '18) url format
import locale
locale.setlocale(locale.LC_TIME,'nl_NL')
pd.to_datetime('now').strftime('%A %d %B')
pd.to_datetime(Date,format='%Y-%m').strftime('%A %d %B')

'woensdag 01 januari'

### Internal variables

In [4]:
# website with results
url = 'http://www.domeinenrz.nl/catalogus'

# IRS (belastingdienst) auction was added. Naming of url changed.
if pd.to_datetime(Date,format='%Y-%m') >= pd.to_datetime('2019-05',format='%Y-%m'):
    url = 'http://www.domeinenrz.nl/catalogi'

### Read external data
These files are used to recognize text fragments. Regex patterns are mapped to field names.

In [5]:
tags=pd.read_csv('./regex-patterns/drz-re-patt-tag.txt',
                    comment='#',
                    header=None,
                    quotechar='"',
                    delimiter=",",
                    skipinitialspace=True).rename(columns={0 : 'Field',1 : 'Pattern'})
                    
flagtags=pd.read_csv('./regex-patterns/drz-re-patt-hastag.txt',
                     comment='#',
                     header=None,
                     quotechar='"',
                     delimiter=",",
                     skipinitialspace=True).rename(columns={0 : 'Field',1 : 'Pattern'})

repfragments=pd.read_csv('./regex-patterns/drz-re-patt-replace.txt',
                      comment='#',
                      header=None,
                      quotechar='"',
                      delimiter=",",
                      skipinitialspace=True).rename(columns={0 : 'Pattern',1 : 'Replace'}).fillna('')

### Functions

In [6]:
def gettree(baseurl,Lotid,Date=Date,disp=False):
    
    '''
    tree from url. Version where urls are formatted as 
    http://www.domeinenrz.nl/catalogus/verkoop_bij_inschrijving_2018-0009?=&meerfotos=K1800091800
    later it even changed to:
    .  .  .  .  .  .  . klik_hier_voor_verkoop_bij_inschrijving_2018-0011
    Then IRS was added:
    .  .  .  .  .  .  .  .  .catalogi/verkoop_bij_inschrijving_2020-0001?=&meerfotos=K2000011800
    Which includes:
    .  .  .  .  .  .  .  .  .  .  .  .verkoop_bij_opbod_2020-0101?veilingen=2020-0101&meerfotos=K2001011977&status=both
    '''
    
    # Change date format and extend to url
    if toggle_to_opbod:
        Datestr = Date + '01'
        baseurl += '/verkoop_bij_opbod_{:s}'.format(Datestr)
    else:
        Datestr = Date.replace('-','-00')
        if pd.to_datetime(Datestr,format='%Y-00%m') < pd.to_datetime('2018-0011',format='%Y-00%m'):
            baseurl += '/verkoop_bij_inschrijving_{:s}'.format(Datestr)
        elif pd.to_datetime(Datestr,format='%Y-00%m') < pd.to_datetime('2019-0002',format='%Y-00%m'):
            baseurl += '/klik_hier_voor_verkoop_bij_inschrijving_{:s}'.format(Datestr)
        elif pd.to_datetime(Datestr,format='%Y-00%m') < pd.to_datetime('2019-0005',format='%Y-00%m'):
            baseurl += '/verkoop_bij_inschrijving_{:s}_{:s}'.format(Datestr,pd.to_datetime(Datestr,format='%Y-00%m').strftime('%B'))
        else:
            baseurl += '/verkoop_bij_inschrijving_{:s}'.format(Datestr)
    

    
    # create url
    urldata = {}
    if toggle_to_opbod == False:
        urldata[''] = '' # to create '=&'. This might be a bug in the site 
    
    # Add auction id
    urldata['veilingen'] = Datestr

    if toggle_to_opbod:
        urldata['status'] = 'both' # or "closed"        

    
    # Add lot number
    urldata['meerfotos'] = Lotid
    # generate url with urldata
    KavelUrl = baseurl + '?' + urllib.parse.urlencode(urldata)
    if disp: print(KavelUrl)
    
    # get html string
    req_success = False
    c=0
    while req_success == False:
        c+=1
        try:
            page = requests.get(KavelUrl)
            req_success = True
        except:
            if c > 10:
                raise Exception('Retried, but failed')
            else:
                print('pause 1 sec and try again!')
                time.sleep(1)
                req_success = False

    # find encoding
    DecodeType = page.headers["Content-type"]
    T = 'charset='
    DecodeType = DecodeType[DecodeType.find(T)+len(T):]
    # convert to unicode
    htmlstring = codecs.decode(page.content, DecodeType)
    # convert string to tree object
    tree = html.fromstring(htmlstring)
    
    return tree,KavelUrl

def gettree_v1(baseurl,Lot,Date=None,disp=False):
    
    '''
    tree from url. Version where urls are formatted as 
    http://www.domeinenrz.nl/catalogus?=&meerfotos=1799&veilingen=2018-09"
    '''
    
    # create url
    urldata = {}
    urldata[''] = '' # to create '=&'. This might be a bug in the site 
    # was date in input?
    if Date != None and len(Date) != 0:
        urldata['veilingen'] = Date
    # Add lot number
    urldata['meerfotos'] = Lot
    # generate url with urldata
    KavelUrl = baseurl + '?' + urllib.parse.urlencode(urldata)
    if disp: print(KavelUrl)
    
    # get html string
    page = requests.get(KavelUrl)
    # find encoding
    DecodeType = page.headers["Content-type"]
    T = 'charset='
    DecodeType = DecodeType[DecodeType.find(T)+len(T):]
    # convert to unicode
    htmlstring = codecs.decode(page.content, DecodeType)
    # convert string to tree object
    tree = html.fromstring(htmlstring)
    
    return tree,KavelUrl


def extractitem(tree,name,disp=False):
    "extract lines from tree"
    
    if name == "title":
        
        '''
        Return title of this page. This can be found in a H4 with class name 'title'.
        '''
            
        # path = '//body/div[@id="mainwrapper"]/div[@id="main"]/div[@class="wrapper"]/div[@class="article"]/div[@class="catalogus"]/div[@class="catalogusdetailitem split-item-first"]/a/h4[@class="title"]//text()'
        path = '//h4[@class="title"]/text()'
        return tree.xpath(path)[0].strip()

    elif name == "images":
        
        '''
        Return urls (src) of images. These are inside divs of class 'photo'
        '''
        
        lines = [item.get('src') for item in tree.xpath('//div[@class="photo"]/img')]
        
        if disp:
            print(lines)
        
        return lines
    
    elif name == "text":
        
        '''
        Just return all relevant text, which is in class 'catalogusdetailitem split-item-first'.
        '''
        
        lines=tree.xpath('//div[@class="catalogusdetailitem split-item-first"]/text()')
        
        if disp:
            print(len(lines))
        
        return lines
    
    elif name == "date":
        
        '''
        Return date of this auction by taking the title of the page.
        This is pretty obsolete, because date is given at start of this notebook.
        '''
        
        lines = tree.xpath('//title/text()')
        Date = lines[0]
        
        if 'Verkoop catalogus ' in Date:
            # title like "Verkoop catalogus 2017-12"
            Date = re.match('Verkoop catalogus (.*)',Date)[1]

        elif 'Verkoop bij inschrijving ' in Date:
            # title like "Verkoop bij inschrijving 2019-0001 januari"
            M = re.match('Verkoop bij inschrijving (20[0-9]{2})-00([0-9]{2}).*',Date)
            print(M.group(2))
            Date = '-'.join([M.group(1),M.group(2)])

        else:
            raise Exception('TODO: implement')

        # Date = Date.strip()
        # T = 'Verkoop catalogus '
        # Date = Date[Date.index(T)+len(T):]
        
        return Date
    
    elif name == "nextlot":
        
        '''
        Return number of next lot by checking out the link to the next lot in the current page.
        'K1900011801' will become 1801
        '''
        
        
        # link to next lot
        Link = tree.xpath('//div[@class="catalogusdetailitem split-item-first"]/div[4]/div[3]/a')
        Tar = Link[0].get("href")
        
        # extract lot name
        #nextLot = re.match('\?meerfotos=(.*)',Tar).group(1)
        nextLot = re.match('.*[\?,\&]meerfotos=(.*)(\&.*)?',Tar).group(1)

        if "&veilingen=" in nextLot:
            nextLot = re.match('(.*)&',nextLot).group(1)
            
        # convert to integer
        nextLot = int(nextLot[-4:])

        if disp:
            print(nextLot,Tar,etree.tostring(Link[0]))
                
        return nextLot
    
    elif name == "price":
        
        '''
        Return price as float
        '''
        
        
        # price can be bold or strong
        Price = tree.xpath('//div[@class="catalogusdetailitem split-item-first"]/strong/text()')

        if toggle_to_opbod:
            # Starts with status
            if len(Price) < 2:
                # fall back: no bold
                Price = [tree.xpath('//div[@class="catalogusdetailitem split-item-first"]/text()')[0]]
        else:
            if len(Price) == 0:
                Price = tree.xpath('//b/text()')

        if disp: print(Price)
        
        if len(Price) == 0:
            print('No Price found! use 0 for now')
            print(*tree.xpath('//*[@class="catalogusdetailitem split-item-first"]/text()'))
            Price = ['Niet gegund']
            raise Exception('Fix this')
        
        # select first in list (xpath returns lists)
        if toggle_to_opbod:
            Price = Price[-1]
        else:
            Price = Price[0]
            
        if Price == 'Na loting':
            Price = tree.xpath('//strong/text()')[0]
            Draw = True
        else:
            Draw = False        
 
        Tags = ['Zie kavel','Zie massakavel', 'Zie Kavel']# part of combination lot
        if any([Tag in Price for Tag in Tags]) :
            Price = 0
        elif Price == 'Niet gegund':
            Price = 0
        else:
            M = re.match(u'Gegund voor: \u20ac *([0-9,.]*,[0-9]{2}) *\(excl. alle eventuele bijkomende kosten en belastingen\)',Price)
            if disp:print(M.group(0))
            Price = float(M.group(1).replace('.','').replace(',','.'))
            # Tag1 = u'Gegund voor: \u20ac'
            # Tag2 = u'(excl. alle eventuele bijkomende kosten en belastingen)'
            # Price = float(Price[Price.index(Tag1)+len(Tag1):Price.index(Tag2)].strip().replace('.','').replace(',','.'))
            
        return Price,Draw
    

### First: Get all results from all pages
This will read all pages and the raw text is stored for later use.  
The "**next lot**" is linked in the current result. The function will look for this link and proceed. Because it is not know what the first lot will be, it is hard coded at `Lot = 1799`. It will increment with a step of `+1` to find the first lot. If the first lot is not (yet) found a period (`.`) is printed, otherwise the lot nummer will be printed. The console output should start with "`.`" (a period).  
Searching for next lots will continue untill the next lot has a **smaller** value that the current. This will cause the routine to stop when the last lot points back to the first lot.

In [7]:
# empty lists
AllLot = []
AllTree = []
AllKavelUrl = []
doLoop = True # set to false at the end.
# first lot
if toggle_to_opbod:
    Lot = 1000
else:
    Lot = 1799
    
while doLoop:
    # Lot id
    # 'K1800091800'
    if toggle_to_opbod:
        Lotid = 'K{:s}{:s}01{:.0f}'.format(Date[2:4],Date[5:8],Lot)
    else:
        Lotid = 'K{:s}00{:s}{:.0f}'.format(Date[2:4],Date[5:8],Lot)
    
    # read page
    read_success = False
    c=0
    while read_success == False:
        c+=1
        if pd.to_datetime(Date, format = '%Y-%m') < pd.to_datetime('2018-9-1'):
            [Tree,KavelUrl]=gettree_v1(url,Lot=str(Lot),Date=Date,disp=Verbose>1)
        else:
            [Tree,KavelUrl]=gettree(url,Lotid=Lotid,Date=Date,disp=Verbose>1)
        Content = Tree.xpath('//*[@id="content"]/div[1]/b/text()')
        if Content == 'failed': # future
            if c > 10:
                raise Exception('Retried, but failed')
            else:
                print('pause 1 sec and try again!')
                time.sleep(1)
                read_success = False
        else:
            read_success = True
    
    if Content and Content[0] == 'Niets gevonden.':
        # Lot number does not exist
        NextLot = Lot + 1
        print('.',end='-')
    else :
        # find next number
        try:
            NextLot=extractitem(Tree,'nextlot')
        except:
            print(KavelUrl)
            print('try again',end='>')
            NextLot = Lot
#             continue
#             print (etree.tostring(Tree,pretty_print=True).decode('utf8'))
            raise 
                   
        # add current results to list
        AllLot.append(Lot)
        AllTree.append(Tree)
        AllKavelUrl.append(KavelUrl)
        print(str(Lot),end='>')
    if NextLot < Lot :
        # First Lot again. Break loop
        doLoop = False
    else :
        Lot = NextLot

print('.',end='X')


.-.-.-.-1004>1007>1008>1010>1011>1012>1013>1016>1017>1018>1020>1021>1022>1025>1027>1028>1029>1031>1032>1034>1036>1037>1038>1039>1041>1042>1044>1053>1056>1057>1058>1061>1062>1064>1066>1067>1068>1069>1071>1072>1076>1079>1081>1083>1084>1087>1088>1089>1091>1092>1093>1095>1096>1097>1100>1101>1103>1104>1105>1106>1108>1112>1113>1114>1115>1116>1117>1118>1119>1121>1122>1123>1124>1125>1127>1129>1130>1132>1133>1134>1135>1136>1137>1139>1140>1143>1146>1147>1148>1149>1151>1152>1153>1155>1157>1158>1159>1160>1161>1163>1164>1165>1169>1170>1174>1178>1180>1181>1183>1185>1186>1188>1189>1190>1191>1192>1193>1194>1196>1198>1200>1201>1203>1204>1206>1207>1208>1209>1210>1211>1212>1213>1214>1218>1220>1222>1225>1227>1228>1229>1234>1235>1236>1237>1239>1240>1241>1242>1243>1244>1245>1246>1247>1248>1249>1250>1252>1253>1254>1256>1258>1260>1261>1262>1264>1265>1266>1267>1271>1273>1275>1276>1277>1278>1279>1280>1282>1283>1284>1286>1290>1291>1293>1294>1295>1297>1298>1299>1301>1304>1305>1308>1315>1316>1317>1318>1319>1320>13

### Basic parsing
Raw text is parsed for the first time. Some basics are stored in a pandas.DataFrame:  
- price
- image urls
- title
- ..

In [8]:
Verbose = 0
# empty list
out = None
# loop over all pages
for iK, tree in enumerate(AllTree):
    
    #
    # create an index
    #

    #   date
    if "Date" not in locals() or not Date:
        Date = extractitem(tree,'date',disp=Verbose>2)

    DT = pd.to_datetime(Date,format="%Y-%m")
    
    #   title and lot number
    title = extractitem(tree,'title')
    Lotid = re.match('Kavel (.*)',title).group(1)
    #Lotid = title[len('Kavel '):]
    if Lotid.startswith('K'):
        Lot = int(Lotid[-4:])
    else:
        Lot = int(Lotid)

    #   index
    IX = "-".join([str(DT.year),str(DT.month),str(Lot)])

    if Verbose>0: print(IX)
    
    
    #
    # extract images
    #
    
    
    image_urls = [re.sub('\/catalog((us)|(i))','',url) + item for item in extractitem(tree,'images',disp=Verbose>2)]


    
    #
    # Status
    #
    
    if toggle_to_opbod:
        # FUTURE
        pass
        #extractitem(tree, 'status', disp=Verbose>2)
        
     
    #
    # Price
    #
    
    [Price,Draw] = extractitem(tree,'price',disp=Verbose>2)
        
        
        
    #    
    # add to data frame
    #
    
    out = pd.concat([out,pd.DataFrame({'Source' : AllKavelUrl[iK],
                                      'Title' : title,
                                      'Price' : Price,
                                      'Draw' : Draw,
                                      'Raw_text' : [extractitem(tree,'text')],
                                      'N_images' : len(image_urls),
                                      'Images' : [image_urls]},
                                      index = [IX])])
out.tail()

Unnamed: 0,Source,Title,Price,Draw,Raw_text,N_images,Images
2020-1-7303,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K2001017303,1038.0,False,"[K2001017303, Bedrijfswagen, OPEL, Type vivaro...",3,[http://www.domeinenrz.nl/ufc/static/157778557...
2020-1-7304,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K2001017304,0.0,False,"[Niet gegund, K2001017304, Personenauto, VOLKS...",3,[http://www.domeinenrz.nl/ufc/static/157778558...
2020-1-7310,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K2001017310,420.0,False,"[K2001017310, Personenauto, RENAULT, Type mega...",2,[http://www.domeinenrz.nl/ufc/static/157778558...
2020-1-7311,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K2001017311,184.0,False,"[K2001017311, Personenauto, PEUGEOT, Type 307;...",3,[http://www.domeinenrz.nl/ufc/static/157778558...
2020-1-7312,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K2001017312,184.0,False,"[K2001017312, Personenauto, PEUGEOT, Type 206;...",2,[http://www.domeinenrz.nl/ufc/static/157778557...


### In depth parsing
Do some more sofisticated parsing. Use `Raw_text` as input.  
When a line is not recognized. It will be printed to console. One might choose to add to the external text files if a fragment of tag occurs often.

In [9]:
Verbose = 0
# parse raw text
for IX in out.index :
    
    # find info
    
    rt = out.loc[IX,"Raw_text"]
    
    # first line:
    
    # Is it a draw?
    Val = rt.pop(0) 
    if Val == 'Na loting':
        Val = rt.pop(0) # val is now kavelnr
        out.loc[IX,"Draw"] = True
    else:
        out.loc[IX,"Draw"] = False
    
    # when lot number is followed by an asteriks there is a note
    if Val.endswith('*\r'):
        Val = Val[0:-2]
        out.loc[IX,"Note"] = True
    else :
        Val = Val.strip()
        out.loc[IX,'Note'] = False
        
    if Verbose>0:
        print(Val)

    # store lot nr        
    out.loc[IX,"LotNr"]=Val
    
    
    # second line
    out.loc[IX,"LotType"]=rt.pop(0).strip()

    # third line
    Val = rt.pop(0).strip()
    # This line is brand or optional line with type of lot
    # All caps is brand
    if Val in ['Quad','Kampeerwagen/ camper','Pleziervaart motorvaartuig met opbouw en open kuip','Rubberboot'] or not Val.isupper():
        out.loc[IX,"LotType"] += ''.join([' (' + Val + ')'])
        if Verbose>0:print(Val)
        Val = rt.pop(0).strip() # now it is brand
    out.loc[IX,"ItemBrand"]=Val

    
    
    # escape characters, repair typos and translate 
    for i in range(len(rt)):
        
        # encode string as bytes
        rt[i] = rt[i].encode('ascii',errors='xmlcharrefreplace')
        
        # replace text
        for pat,sub in zip(repfragments.Pattern,repfragments.Replace):
            rt[i] = re.sub(pat.encode('ascii',errors='xmlcharrefreplace'),sub.encode('ascii',errors='xmlcharrefreplace'),rt[i])
        
        # decode back to string, but special characters escaped to xml
        rt[i]=rt[i].decode('ascii')

    # Pull value after trailing or leading pattern (bgntag/endtag)
    for Tag,Field in zip(tags.Pattern,tags.Field):
        M = re.search(Tag,'\n'.join(rt))
        if M:
            Val = M.group('val')
            if Verbose>2:
                print(str(Field) + ' : ' + M.group(0).replace('\n','[newline]') + '\n\t' + '|' + Val + '|')
            # remove pattern and make rt a list again.
            rt = '\n'.join(rt).replace(M.group(0),'').split('\n')
        else:
            Val = ''
        out.loc[IX,Field] = Val        

    # Pattern in full text? (flagtag)
    for Tag,Field in zip(flagtags.Pattern,flagtags.Field):
        # flagtags might occur more than once, hence a list of finditer results
        Ms = list(re.finditer(Tag,'\n'.join(rt)))
        if Ms:
            Val = True
            for M in Ms:
                if Verbose>2:
                    print(str(Field) + ' : ' + M.group(0).replace('\n','[newline]') + '\n\t' + '|' + str(Val) + '|')
                # remove pattern and make rt a list again.
                rt = '\n'.join(rt).replace(M.group(0),'').split('\n')
        else:
            Val = False
        out.loc[IX,Field] = Val

        
        
    # loop trough remaining lines

    for line in rt:
               
        # do comparison in bytes
        line = line.encode('ascii',errors='xmlcharrefreplace')
        if Verbose>2:
            print(line)
            
        # parsing
        isParsed = False # some accounting: in the end this line should be parsed
         
        # line is empty.. skip .. next
        if not line :# empty
            isParsed = True
            continue
            
        # line starting with '*' is a note
        if out.loc[IX,'Note'] and line.startswith(bytes('*','ascii')):
            if Verbose>2:
                print('\tNote:',end='')
                print(out.loc[IX,'Note'],end='')
                print(line)
            Val = line[1:].decode('ascii')
            out.loc[IX,'Note'] = Val
            isParsed = True
            continue
                
        if isParsed == False:
            line = line.decode('ascii')
            
            # create empty string if not exist
            if (
                'SupInfo' not in out.loc[IX].index
            ) or (
                (
                    type(out.loc[IX,'SupInfo']) != str
                ) and (
                    pd.np.isnan(out.loc[IX,'SupInfo'])
                )
            ):
                out.loc[IX,'SupInfo'] = ''
            out.loc[IX,"SupInfo"] = '\n'.join([out.loc[IX,'SupInfo'] , str(line)])
            print(str(IX) + '[' + str(line) + ']')
  

2020-1-1100[Rechter voorportier kan niet open.]
2020-1-1105[Achterbank is manco]
2020-1-1129[*Kavel is vervallen. ]
2020-1-1159[*Kavel is vervallen. ]
2020-1-1209[In de laadruimte bevindt zich een waterreservoir.]
2020-1-1236[*Kavel is vervallen. ]
2020-1-1250[*Kavel is vervallen. ]
2020-1-1305[*Kavel is vervallen. ]
2020-1-1319[*Kavel is vervallen. ]
2020-1-1926[*Kavel is vervallen. ]
2020-1-1957[*Kavel is vervallen. ]
2020-1-1961[Kilometerteller gaat tot 99.999 km en begint dan opnieuw.]
2020-1-1979[Deels gedemonteerd]


In [10]:
out.tail()

Unnamed: 0,Source,Title,Price,Draw,Raw_text,N_images,Images,Note,LotNr,LotType,...,no_odo,no_road,disclaim_cr6,crewcab,carwrap,no_vin,d_lic,btw21,legguard,SupInfo
2020-1-7303,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K2001017303,1038.0,False,"[Type vivaro 2.5dti 2.7t l1h1, Kenteken 07-BR-...",3,[http://www.domeinenrz.nl/ufc/static/157778557...,False,K2001017303,Bedrijfswagen,...,False,False,False,False,False,False,False,False,False,
2020-1-7304,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K2001017304,0.0,False,"[Type golf, Kenteken 4-XSL-33, Afgelezen km-st...",3,[http://www.domeinenrz.nl/ufc/static/157778558...,False,Niet gegund,K2001017304 (Personenauto),...,False,False,False,False,False,False,False,False,False,
2020-1-7310,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K2001017310,420.0,False,"[Type megane scenic; 1.6 16v (83kw), Kenteken ...",2,[http://www.domeinenrz.nl/ufc/static/157778558...,False,K2001017310,Personenauto,...,False,False,False,False,False,False,False,False,False,
2020-1-7311,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K2001017311,184.0,False,"[Type 307; 2.0hdi 66kw 5drs, Kenteken 05-JT-VJ...",3,[http://www.domeinenrz.nl/ufc/static/157778558...,False,K2001017311,Personenauto,...,False,False,False,False,False,False,False,False,False,
2020-1-7312,http://www.domeinenrz.nl/catalogi/verkoop_bij_...,Kavel K2001017312,184.0,False,"[Type 206; sw 1.4, Kenteken 56-LL-HB, Afgeleze...",2,[http://www.domeinenrz.nl/ufc/static/157778557...,False,K2001017312,Personenauto,...,False,False,False,False,False,False,False,False,False,


### Save results to disk

In [11]:
if toggle_to_opbod:
    file_name = r'../../../python-nb/data/drz-data-opbod-{}.pkl'.format(Date)
else:
    file_name = '../data/drz-data-{}.pkl'.format(Date)
print(file_name)
out.to_pickle(file_name)

../../../python-nb/data/drz-data-opbod-2020-01.pkl


# Next: add rdw data

Because rdw data changes constantly it is advisable to run the notebook that adds rdw data to the above results soon.