# Add extra information to DRZ auction results

Query to the open data dataset of the RDW.

It may take a while (10 min) to query all auction results.
- - - - 

### User variables


In [1]:
Date = '2019-03' # yyyy-mm

### Modules and functions

In [2]:
import pandas as pd

# to keep api key hidden import this from sub dir
import assets.hidden_api_keys as hidden_api_keys

# base url
apiurl = 'https://opendata.rdw.nl/resource/m9d7-ebf2.json?$$app_token=' + hidden_api_keys.socrata_apptoken + '&'

def get_json_from_api(url,reg,c=0):
    
    '''Get json object from api'''
    
    import time

    c+=1
    try:
        df=pd.read_json(url + 'kenteken=' + reg.replace('-','').upper()).to_dict()
    except:
        if c > 10:
            print(url,reg)
            raise 
        else:
            print('pause 2 sec and try again!')
            time.sleep(2)
            df = get_json_from_api(url,reg,c)
    
    return df
    
# get_json_from_api(apiurl,'61-sf-FG')

### Load auction results

In [3]:
file_name = '../data/drz-data-{}.pkl'.format(Date)
print(file_name)
drz = pd.read_pickle(file_name)

../data/drz-data-2019-03.pkl


### query rdw

In [4]:
# see what lots have a Dutch registration (license number).
hasReg = (~drz.Reg.isnull()) & (drz.Reg != 'onbekend') & (~drz.LotType.isin([
    'Vaartuig',
    'Jetski',
    'Sloep',
    'Speedboot',
    'Vaartuig (Type onbekend)',
    'Motorvaartuig met opbouw (Pleziervaartuig)',
    'Aanhangwagen',
]))

# make a copy and add info
rdw = drz.copy()
rdw["rdwinfo"]=None

# loop over lots with regristration
for idx in hasReg.index[hasReg]:
    
    
    # progress indicator
    
    prog = pd.np.where(hasReg.index==idx)[0].item()/len(hasReg.index)
    nprog = int(prog*20)
    s = ':' * nprog + '-' * (20-nprog)

    print('{:s} {:.1f}%'.format(s, prog*100), end = '', flush = True)

    
    # Registration  (License plate)
    
    Reg = rdw.loc[idx,'Reg']

    
    # make list of dicts that came from api responses
    
    d=[]
    d.append(get_json_from_api(apiurl,Reg))
    
    
    # deeper api could be listed in result. Query those too

    for api in [k for k in d[0].keys() if k.startswith('api')]:
        apiurl2 = d[0][api][0] + '?$$app_token=' + hidden_api_keys.socrata_apptoken + '&' 
        d.append(get_json_from_api(apiurl2,Reg))

        
    # concat dicts into one
    
    dd = d[0] # append to first
    for iD in range(1,len(d)):
        dd={**dd,**d[iD]}
        

    # add time stamp, when rdw was queried

    dd['TimeStamp']=pd.to_datetime('now').strftime('%Y%m%d')

    # add dict to df

    rdw.loc[idx,'rdwinfo'] = [dd]
    
    # reset prog. indicator
    print('', end='\r')
    
print('{:s} {:.1f}% <done>'.format(s, prog*100))

:::::::::::::::::::- 99.6% <done>


### Saving

In [5]:
file_name = '../data/rdw-data-{}.pkl'.format(Date)
print(file_name)
rdw.to_pickle(file_name)

../data/rdw-data-2019-03.pkl
