# Add extra information to DRZ auction results

Query to the open data dataset of the RDW.

It may take a while (10 min) to query all auction results.
- - - - 

### User variables


In [1]:
Date = '2019-05' # yyyy-mm

### Modules and functions

In [2]:
import pandas as pd
import re 
# to keep api key hidden import this from sub dir
import assets.hidden_api_keys as hidden_api_keys

# base url
apiurl = 'https://opendata.rdw.nl/resource/m9d7-ebf2.json?$$app_token=' + hidden_api_keys.socrata_apptoken + '&'

def get_json_from_api(url,reg,c=0):
    
    '''Get json object from api'''
    
    import time

    c+=1
    try:
        df=pd.read_json(url + 'kenteken=' + reg.replace('-','').upper()).to_dict()
    except:
        if c > 10:
            print(url,reg)
            raise 
        else:
            print('pause 2 sec and try again!')
            time.sleep(2)
            df = get_json_from_api(url,reg,c)
    
    return df
    
# get_json_from_api(apiurl,'61-sf-FG')

### Load auction results

In [3]:
file_name = '../data/drz-data-{}.pkl'.format(Date)
print(file_name)
drz = pd.read_pickle(file_name)

../data/drz-data-2019-05.pkl


### query rdw

In [4]:
# see what lots have a Dutch registration (license number).
hasReg = (~drz.Reg.isnull()) & (drz.Reg != 'onbekend') & (drz.Reg != '') & (~drz.LotType.isin([
    'Vaartuig',
    'Jetski',
    'Sloep',
    'Speedboot',
    'Vaartuig (Type onbekend)',
    'Motorvaartuig met opbouw (Pleziervaartuig)',
    'Aanhangwagen',
]))

# make a copy and add info
rdw = drz.copy()


In [5]:
def get_query_url(api_url, regs, token=hidden_api_keys.socrata_apptoken):
    '''construct query url'''
    
    # convert list to string
    reg_list = ''.join(["'{}', ".format(r.replace('-','').upper()) for r in regs])
    reg_list = reg_list[0:-2] # remove trailing ', '
    q = api_url + '?$$app_token=' + token + '&$where='
    # add escaped soql
    q += urllib.parse.quote('kenteken in(' + reg_list + ')')
    return q

Create list of dataframes with different api results

In [6]:
import urllib

# empty list
dfs = list()
# first element is all registrations
dfs.append(rdw.loc[hasReg,['Reg']])
dfs[0]['kenteken'] = dfs[0].Reg.apply(lambda r: r.replace('-','').upper())
dfs[0].index.name = 'lot_index'
dfs[0] = dfs[0].reset_index().set_index('kenteken')
display(dfs[0].tail())
print(dfs[0].shape)


# do main api first to get other possible apis
api_name = 'api_gekentekende_voertuigen'
api_url = 'https://opendata.rdw.nl/resource/m9d7-ebf2.json'
regs = dfs[0].Reg.values
print(len(regs),'registrations in this set')

# query data base
q = get_query_url(api_url,regs)
dfs.append(pd.read_json(q))
dfs[1].set_index('kenteken', inplace=True)
display(dfs[1].tail())
print(dfs[1].shape)

# query other available apis
for api_name in [c for c in dfs[1].columns if c.startswith('api')]:
    print(api_name)
    for api_url in dfs[1][api_name].unique():
        print(api_url)
        # query the web
        q = get_query_url(api_url,regs)
        df0 = pd.read_json(q)
        # name of index
        df0.columns.name = api_name

        # query should return 'kenteken', make it the index
        if df0.shape[0] != 0:
            df0.set_index('kenteken', inplace=True)
            
        # Some apis return multiple values. Pivot around index number ("volgnummer")
        if api_name == 'api_gekentekende_voertuigen_assen':
            df0 = pd.pivot(df0, columns='as_nummer')

        elif api_name == 'api_gekentekende_voertuigen_brandstof':
            df0 = pd.pivot(df0, columns='brandstof_volgnummer')

        elif api_name == 'api_gekentekende_voertuigen_carrosserie':
            df0 = pd.pivot(df0, columns='carrosserie_volgnummer')

        elif api_name == 'api_gekentekende_voertuigen_carrosserie_specifiek':
            df0 = pd.pivot(df0, columns='carrosserie_volgnummer')

        # squeeze multi index
        one_level = [
            re.sub('^api_gekentekende_voertuigen_','',api_name) + '_' + '_'.join(
                [str(c) if type(c)==int else c for c in l]
            ) for l in df0.columns
        ]
        df0.columns = one_level

        # add to list
        display(df0.tail())
        print(df0.shape)
        dfs.append(df0)   

Unnamed: 0_level_0,lot_index,Reg
kenteken,Unnamed: 1_level_1,Unnamed: 2_level_1
84BXJK,2019-5-8349,84-BX-JK
WB29DB,2019-5-8351,WB-29-DB
94TDGP,2019-5-8352,94-TD-GP
WXXH06,2019-5-8353,WX-XH-06
VB016D,2019-5-8355,VB-016-D


(293, 2)
293 registrations in this set


Unnamed: 0_level_0,aanhangwagen_autonoom_geremd,aanhangwagen_middenas_geremd,aantal_cilinders,aantal_deuren,aantal_rolstoelplaatsen,aantal_wielen,aantal_zitplaatsen,afstand_hart_koppeling_tot_achterzijde_voertuig,afstand_voorzijde_voertuig_tot_hart_koppeling,api_gekentekende_voertuigen_assen,...,variant,vermogen_brom_snorfiets,vermogen_massarijklaar,vervaldatum_apk,voertuigsoort,volgnummer_wijziging_eu_typegoedkeuring,wacht_op_keuren,wam_verzekerd,wielbasis,zuinigheidslabel
kenteken,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
VJ894G,2800.0,2800.0,4.0,0.0,0.0,4.0,3.0,0.0,0.0,https://opendata.rdw.nl/resource/3huj-srit.json,...,CVFF1ABX,,0.06,20190604.0,Bedrijfsauto,0.0,Geen verstrekking in Open Data,Nee,330.0,
VXSX82,,,5.0,0.0,,4.0,,0.0,0.0,https://opendata.rdw.nl/resource/3huj-srit.json,...,,,0.04,20181204.0,Bedrijfsauto,0.0,Geen verstrekking in Open Data,Nee,355.0,
WB29DB,,,,,,,,419.0,,https://opendata.rdw.nl/resource/3huj-srit.json,...,,,,,Aanhangwagen,,Geen verstrekking in Open Data,N.v.t.,,
WXXH06,,,,0.0,,0.0,,481.0,0.0,https://opendata.rdw.nl/resource/3huj-srit.json,...,,,,,Aanhangwagen,0.0,Geen verstrekking in Open Data,N.v.t.,92.0,
ZBPB92,,,4.0,4.0,0.0,4.0,5.0,0.0,0.0,https://opendata.rdw.nl/resource/3huj-srit.json,...,STAKLX01,,0.06,20180119.0,Personenauto,1.0,Geen verstrekking in Open Data,Nee,250.0,


(249, 59)
api_gekentekende_voertuigen_assen
https://opendata.rdw.nl/resource/3huj-srit.json


Unnamed: 0_level_0,assen_aangedreven_as_1,assen_aangedreven_as_2,assen_aantal_assen_1,assen_aantal_assen_2,assen_hefas_1,assen_hefas_2,assen_plaatscode_as_1,assen_plaatscode_as_2,assen_spoorbreedte_1,assen_spoorbreedte_2,assen_technisch_toegestane_maximum_aslast_1,assen_technisch_toegestane_maximum_aslast_2,assen_wettelijk_toegestane_maximum_aslast_1,assen_wettelijk_toegestane_maximum_aslast_2
kenteken,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
VB624N,J,N,2,2,N,N,V,A,154.0,155.0,,,1140.0,1020.0
VJ894G,J,N,2,2,N,N,V,A,174.0,172.0,,,1650.0,1575.0
VXSX82,,,2,2,,,V,A,164.0,165.0,,,1460.0,1500.0
WXXH06,,,2,2,N,N,A,A,,,700.0,700.0,700.0,700.0
ZBPB92,,,2,2,,,,,152.0,149.0,890.0,950.0,890.0,950.0


(179, 14)
api_gekentekende_voertuigen_brandstof
https://opendata.rdw.nl/resource/8ys7-d773.json


Unnamed: 0_level_0,brandstof_brandstof_omschrijving_1,brandstof_brandstof_omschrijving_2,brandstof_brandstofverbruik_buiten_1,brandstof_brandstofverbruik_buiten_2,brandstof_brandstofverbruik_gecombineerd_1,brandstof_brandstofverbruik_gecombineerd_2,brandstof_brandstofverbruik_stad_1,brandstof_brandstofverbruik_stad_2,brandstof_co2_uitstoot_gecombineerd_1,brandstof_co2_uitstoot_gecombineerd_2,...,brandstof_nominaal_continu_maximumvermogen_1,brandstof_nominaal_continu_maximumvermogen_2,brandstof_roetuitstoot_1,brandstof_roetuitstoot_2,brandstof_toerental_geluidsniveau_1,brandstof_toerental_geluidsniveau_2,brandstof_uitstoot_deeltjes_licht_1,brandstof_uitstoot_deeltjes_licht_2,brandstof_uitstoot_deeltjes_zwaar_1,brandstof_uitstoot_deeltjes_zwaar_2
kenteken,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
VB340X,Diesel,,6.6,,6.9,,7.3,,183.0,,...,,,0.5,,2625.0,,0.001,,,
VB624N,Diesel,,3.6,,4.0,,4.6,,105.0,,...,,,0.5,,3000.0,,0.00068,,,
VJ894G,Diesel,,6.6,,6.9,,7.3,,183.0,,...,,,0.5,,2625.0,,0.00056,,,
VXSX82,Diesel,,,,,,,,,,...,,,,,,,0.0186,,,
ZBPB92,Benzine,,6.1,,7.8,,10.7,,187.0,,...,,,,,4200.0,,,,,


(246, 32)
api_gekentekende_voertuigen_carrosserie
https://opendata.rdw.nl/resource/vezc-m2t6.json


Unnamed: 0_level_0,carrosserie_carrosserietype_1,carrosserie_type_carrosserie_europese_omschrijving_1
kenteken,Unnamed: 1_level_1,Unnamed: 2_level_1
VJ894G,BB,Bestelwagen
VXSX82,BB,Bestelwagen
WB29DB,DC,Middenasaanhangwagen
WXXH06,DC,Middenasaanhangwagen
ZBPB92,AA,Sedan


(182, 2)
api_gekentekende_voertuigen_carrosserie_specifiek
https://opendata.rdw.nl/resource/jhie-znh9.json


Unnamed: 0_level_0,carrosserie_specifiek_carrosserie_voertuig_nummer_code_volgnummer_1,carrosserie_specifiek_carrosserie_voertuig_nummer_europese_omschrijving_1,carrosserie_specifiek_carrosseriecode_1
kenteken,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
9VLZ75,1,Gesloten opbouw,3
VB016D,1,Gesloten opbouw,3
VB340X,1,Gesloten opbouw,3
VB624N,1,Gesloten opbouw,3
VJ894G,1,Gesloten opbouw,3


(12, 3)
api_gekentekende_voertuigen_voertuigklasse
https://opendata.rdw.nl/resource/kmfi-hrps.json


(0, 0)


In [7]:
# Merge dataframes from different apis
df = pd.concat(dfs, axis='columns', sort=False)
# add timestamp
df['TimeStamp'] = pd.to_datetime('now').strftime('%Y%m%d')
# set lot id as index
df.index.name = 'kenteken'
df = df.reset_index().set_index('lot_index')
display(df.tail())
print(df.shape)

Unnamed: 0_level_0,kenteken,Reg,aanhangwagen_autonoom_geremd,aanhangwagen_middenas_geremd,aantal_cilinders,aantal_deuren,aantal_rolstoelplaatsen,aantal_wielen,aantal_zitplaatsen,afstand_hart_koppeling_tot_achterzijde_voertuig,...,brandstof_uitstoot_deeltjes_licht_1,brandstof_uitstoot_deeltjes_licht_2,brandstof_uitstoot_deeltjes_zwaar_1,brandstof_uitstoot_deeltjes_zwaar_2,carrosserie_carrosserietype_1,carrosserie_type_carrosserie_europese_omschrijving_1,carrosserie_specifiek_carrosserie_voertuig_nummer_code_volgnummer_1,carrosserie_specifiek_carrosserie_voertuig_nummer_europese_omschrijving_1,carrosserie_specifiek_carrosseriecode_1,TimeStamp
lot_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-5-8349,84BXJK,84-BX-JK,2500.0,2500.0,5.0,0.0,,4.0,3.0,0.0,...,0.051,,,,BB,Bestelwagen,,,,20190511
2019-5-8351,WB29DB,WB-29-DB,,,,,,,,419.0,...,,,,,DC,Middenasaanhangwagen,,,,20190511
2019-5-8352,94TDGP,94-TD-GP,,,4.0,3.0,0.0,4.0,9.0,0.0,...,0.079,,,,AF,MPV,,,,20190511
2019-5-8353,WXXH06,WX-XH-06,,,,0.0,,0.0,,481.0,...,,,,,DC,Middenasaanhangwagen,,,,20190511
2019-5-8355,VB016D,VB-016-D,2500.0,2500.0,4.0,0.0,0.0,4.0,3.0,0.0,...,0.00039,,,,BB,Bestelwagen,1.0,Gesloten opbouw,3.0,20190511


(293, 113)


# Merge rdw and drz

In [8]:
rdw = pd.concat([rdw, df.add_prefix('rdw_')], axis='columns', sort=False)

### Saving

In [9]:
file_name = '../data/rdw-data-{}.pkl'.format(Date)
print(file_name)
rdw.to_pickle(file_name)

../data/rdw-data-2019-05.pkl
