# Merge all auctions into one

Combines several datasets and do some simple analyses.  


### User variables

In [1]:
import os
# setting path
os.chdir(r'..')

import drz_config
cfg = drz_config.read_config()
DATE = cfg['DATE']
VERBOSE = cfg['VERBOSE']
OPBOD = cfg['OPBOD']
SKIPSAVE = cfg['SKIPSAVE']

TAG_SINGLE = "nbconvert_instruction:remove_single_output"

if VERBOSE > 0:
    display(cfg)

{'settings_fn': '../code/assets/drz-auction-settings.ini',
 'DATE': '2022-09',
 'VERBOSE': 1,
 'OPBOD': True,
 'URL': 'http://verkoop.domeinenrz.nl/verkoop_bij_opbod_2022-0901',
 'EXTEND_URL': False,
 'CLOSEDDATA': True,
 'closed_data_fields': '*',
 'SKIPSAVE': False}

In [2]:
# dates of auction
# format: yyyy-mm; yyyy: year, mm: month (%Y-%m)
if OPBOD:
    AuctionDates = ['2019-11', '2019-12', 
                    '2020-01', '2020-02', '2020-03', '2021-09-09', '2021-10-10', '2021-11-11', '2021-12-12', 
                    '2022-01-01', '2022-02-02', '2022-04-04', '2022-05-05', '2022-06-06', '2022-08-08', '2022-09-09'
                   ]
else:    
    AuctionDates = ['2014-10', '2014-11', '2014-12', # old!
                    '2015-01', '2015-02', '2015-03', # old!
                    '2017-03', '2017-04', '2017-05', '2017-06', '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', 
                    '2018-01', '2018-02', '2018-03', '2018-04', '2018-05', '2018-06', '2018-07', '2018-08', '2018-09', '2018-10', '2018-11', '2018-12',
                    '2019-01', '2019-02', '2019-03', '2019-04', '2019-05', '2019-06', '2019-07', '2019-08', '2019-09', '2019-10', '2019-11', '2019-12',
                    '2020-01', '2020-02', '2020-03', '2020-06', '2020-07', '2020-08', '2020-09', '2020-10', '2020-11', '2020-12',
                    '2021-01', '2021-02', '2021-03', '2021-04', '2021-05', 
                    '2021-06', '2021-06-16', '2021-07-07',  '2021-07-17', '2021-08-08', '2021-08-18', '2021-09-09', '2021-09-19', '2021-10-10', '2021-10-20', '2021-11-11', '2021-11-21', '2021-12-12', '2021-12-22',
                    '2022-01-01', '2022-01-21', '2022-02-02', '2022-02-22', '2022-03-03', '2022-03-23', '2022-04-04', '2022-04-24', '2022-05-05', '2022-05-25', '2022-06-06', '2022-07-07', '2022-07-27', '2022-08-08', '2022-08-28'
                   ]  
if DATE not in ['-'.join(d.split('-')[:2]) for d in AuctionDates]:
    raise ValueError(f'{DATE} not in list that will be concatenated. Add to list.')

### Import modules 

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import os
plt.style.use('ggplot')

### Load all data

Auctions are saved monthly in a .pkl file

In [4]:
print('load data')
data = dict()
for AuctionDate in AuctionDates:
    # read data
    if OPBOD:
        fn = '../data/rdw-data-{:s}-opbod.pkl'.format(
            AuctionDate
        )

    else:    
        fn = '../data/rdw-data-{:s}.pkl'.format(
            AuctionDate
        )

    #if VERBOSE > 0: print(fn)
    df = pd.read_pickle(fn)
    data[AuctionDate] = df
    
    
if VERBOSE > 0: 
    print('fix indices')

# matrix of auction date
auct_month = np.array([[int(k.split('-')[0]), int(k.split('-')[1])] for k in data.keys()])
auct_month = np.unique(auct_month, axis=0)

# As of 2021-07 there is a systematic index change
#    this is cause by multiple auctions per month
for m in auct_month[(auct_month[:,0] == 2021) & (auct_month[:,1] >= 7) | (auct_month[:,0] >= 2022)]:
    
    # Auctions per month
    for c in range(2): 

        # multiplier
        #   2021: 10
        #   2022: 20
        if m[0] == 2021:
            multiplier = 10
        elif m[0] == 2022:
            multiplier = 20
        else:
            raise NotImplementedError
        
        # data key
        #    yyyy-mm-xx xx: mm + multiplier
        k = f'{m[0]}-{m[1]:02.0f}-{m[1]+(c*multiplier):02.0f}'
        if k not in data.keys():
            # skip if not exist
            continue
        
        # translation
        #    yyyy-mm-llllxx: llll: lot number
        pat = '{0}-' + f'{m[1]:02.0f}' + '-{2}{1}'
        data[k].index = data[k].index.map(lambda x: pat.format(*x.split('-')))
        
        if VERBOSE > 0:
            print(f'{k} > {data[k].index[0]} .. {data[k].index[-1]}')



load data
fix indices
2021-09-09 > 2021-09-100009 .. 2021-09-111909
2021-10-10 > 2021-10-100010 .. 2021-10-101310
2021-11-11 > 2021-11-100211 .. 2021-11-106011
2021-12-12 > 2021-12-100012 .. 2021-12-105512
2022-01-01 > 2022-01-100001 .. 2022-01-106101
2022-02-02 > 2022-02-100002 .. 2022-02-105202
2022-04-04 > 2022-04-100104 .. 2022-04-102204
2022-05-05 > 2022-05-100005 .. 2022-05-101605
2022-06-06 > 2022-06-100106 .. 2022-06-101906
2022-08-08 > 2022-08-100108 .. 2022-08-100108
2022-09-09 > 2022-09-100009 .. 2022-09-101209


In [5]:
print('load data')
data = dict()
for AuctionDate in AuctionDates:
    # read data
    if OPBOD:
        fn = '../data/rdw-data-{:s}-opbod.pkl'.format(
            AuctionDate
        )

    else:    
        fn = '../data/rdw-data-{:s}.pkl'.format(
            AuctionDate
        )

    if VERBOSE > 0: print(fn)
    df = pd.read_pickle(fn)
    data[AuctionDate] = df

    
    
# When there are two auctions in a single month
if not OPBOD:
    # HACK ALERT!!
    data['2021-06-16'].index = data['2021-06-16'].index.map(lambda x: '{0}-06-{2}{1}'.format(*x.split('-')))
    data['2021-06'].index = data['2021-06'].index.map(lambda x: '{0}-06-{2}{1}'.format(*x.split('-')))
    
    # matrix of auction date
    auct_month = np.array([[int(k.split('-')[0]), int(k.split('-')[1])] for k in data.keys()])
    auct_month = np.unique(auct_month, axis=0)

    # 2021-07 there is a systematic index change
    for m in auct_month[(auct_month[:,0] == 2021) & (auct_month[:,1] >= 7) | (auct_month[:,0] >= 2022)]:
        for c in range(2):

            # multiplier
            #   2021: 10
            #   2022: 20
            if m[0] == 2021:
                multiplier = 10
            elif m[0] == 2022:
                multiplier = 20
            else:
                raise NotImplementedError

            # data key
            k = f'{m[0]}-{m[1]:02.0f}-{m[1]+(c*multiplier):02.0f}'
            if k not in data.keys():
                # skip if not exist
                continue

            # translation
            pat = '{0}-' + f'{m[1]:02.0f}' + '-{2}{1}'
            data[k].index = data[k].index.map(lambda x: pat.format(*x.split('-')))

            if VERBOSE > 0:
                print(f'{k} > {data[k].index[0]} .. {data[k].index[-1]}')
if VERBOSE > 0:
    display({'text/plain': '\nlast rows of last file'}, raw=True, metadata={"tags":(TAG_SINGLE, )})
    display(data[AuctionDates[-1]].tail(), metadata={"tags":(TAG_SINGLE, )})

load data
../data/rdw-data-2019-11-opbod.pkl
../data/rdw-data-2019-12-opbod.pkl
../data/rdw-data-2020-01-opbod.pkl
../data/rdw-data-2020-02-opbod.pkl
../data/rdw-data-2020-03-opbod.pkl
../data/rdw-data-2021-09-09-opbod.pkl
../data/rdw-data-2021-10-10-opbod.pkl
../data/rdw-data-2021-11-11-opbod.pkl
../data/rdw-data-2021-12-12-opbod.pkl
../data/rdw-data-2022-01-01-opbod.pkl
../data/rdw-data-2022-02-02-opbod.pkl
../data/rdw-data-2022-04-04-opbod.pkl
../data/rdw-data-2022-05-05-opbod.pkl
../data/rdw-data-2022-06-06-opbod.pkl
../data/rdw-data-2022-08-08-opbod.pkl
../data/rdw-data-2022-09-09-opbod.pkl


### Consolidate format
Older files are slightly different such as naming conventions etc. Here all results are consolidated into one format.

In [6]:
# Change type of early auctions for images
if VERBOSE > 0:
    print('Fields with images look like this:\n')
    df_ = pd.DataFrame(index=data.keys(), columns=['column', 'type', 'first row'])
    for k in data:
        im_cols = data[k].columns[
            data[k].columns.str.lower().isin(['image', 'images'])
        ]
        example = data[k][im_cols[0]].values[0]
        df_.loc[k, 'type'] = str(type(example))
        df_.loc[k, 'column'] = im_cols[0]
        if isinstance(example, str):
            df_.loc[k, 'first row'] = example
        else:
            df_.loc[k, 'first row'] = len(example)
    with pd.option_context('display.max_row', 999):
        display(df_)


if OPBOD:
    # reformatting did not happen in IRS auctions
    pass
else:
    
    # convert string representation of list to real list
    # "['image1', 'image2']"
    for k in ['2017-03',
              '2017-04',
              '2017-05',
              '2017-06',
              '2017-07',
              '2017-08',
              '2017-09',
              '2017-10']:
        data[k]['Images'] = data[k]['Image'].apply(eval)
        data[k] = data[k].drop(columns=['Image'])

    # convert string representation of list without quotes to real list
    # "[image1, image2]"
    for k in ['2017-11',
              '2017-12',
              '2018-01',
              '2018-02',
              '2018-03',
              '2018-04']:
        data[k]['Images'] = data[k]['Images'].apply(lambda s:eval(re.sub(r"^\[(.*)\]$",r"['\1']",re.sub(' *, *',"', '",s))))
    

# Other inconsistencies
if OPBOD:
    pass
else:
    # Other column name that changed in November 2017
    for k in ['2017-03',
              '2017-04',
              '2017-05',
              '2017-06',
              '2017-07',
              '2017-08',
              '2017-09',
              '2017-10']:
        data[k] = data[k].rename(columns={
            'draw':'Draw',
            'nr':'LotNr',
            'raw':'Raw_text',
            'misc':'SupInfo',
            'footnote':'Note',
            'jfq':'jfc',
        })

Fields with images look like this:



Unnamed: 0,column,type,first row
2019-11,Images,<class 'list'>,3
2019-12,Images,<class 'list'>,3
2020-01,Images,<class 'list'>,2
2020-02,Images,<class 'list'>,3
2020-03,Images,<class 'list'>,2
2021-09-09,Images,<class 'list'>,4
2021-10-10,Images,<class 'list'>,9
2021-11-11,Images,<class 'list'>,11
2021-12-12,Images,<class 'list'>,9
2022-01-01,Images,<class 'list'>,12


# Merge all auctions into one data frame

In [7]:
all_data = pd.concat(data)
df = all_data.droplevel(0)

In [8]:
if VERBOSE > 0:
    # Show columns added
    col_exist = pd.DataFrame.from_dict({k:all_data.columns.isin(d.columns) for k,d in data.items()}, orient='index', columns=all_data.columns)
    col_filled = all_data.isna().all(level=0, axis=0) == False

    idx = col_exist.apply(lambda x:np.where(x)[0])

    first_occurence = col_exist.index[idx.apply(lambda x: x[0])]
    last_occurence = col_exist.index[idx.apply(lambda x: x[-1])]
    nr_occurence = idx.apply(len)
    pct_occurence = idx.apply(lambda x: 100*(len(x)/(col_exist.shape[0]-x[0])))

    idx = col_filled.apply(lambda x:np.where(x)[0])
    last_filled = col_filled.index[idx.apply(lambda x: x[-1] if len(x)>0 else col_exist.shape[0]-1)]
    nr_filled = idx.apply(len)
    pct_filled = idx.apply(lambda x: 100*(len(x)/(col_exist.shape[0]-x[0])) if len(x)>0 else 0)


    df_ = pd.DataFrame(zip(first_occurence, last_occurence, nr_occurence, last_filled, nr_filled, pct_occurence, pct_filled), 
                       index=col_exist.columns, 
                       columns=['first', 'last', 'n', 'last_filled', 'n_not_empty', 'pct_since_first', 'pct_filled'])



    for _by_rdw, df__ in df_.groupby(df_.index.str.startswith('rdw')):
        for g,df___ in df__.groupby('first'):
            if g == col_exist.index[0]:
                continue
            print(f'Fields added in {g}', end= ' ')
            if _by_rdw:
                print('by rdw api')
            else:
                print('through regex parsing')
            with pd.option_context("display.max_rows", 999):
                display(df___.sort_index().sort_values(by='last_filled', ascending=False))

Fields added in 2021-09-09 through regex parsing


  col_filled = all_data.isna().all(level=0, axis=0) == False


Unnamed: 0,first,last,n,last_filled,n_not_empty,pct_since_first,pct_filled
disclaim5,2021-09-09,2022-09-09,11,2022-09-09,11,100.0,100.0
disclaim6,2021-09-09,2022-09-09,11,2022-09-09,11,100.0,100.0
lot_counter,2021-09-09,2022-09-09,11,2022-09-09,11,100.0,100.0
no_nlreg19,2021-09-09,2022-09-09,11,2022-09-09,11,100.0,100.0


Fields added in 2022-01-01 through regex parsing


Unnamed: 0,first,last,n,last_filled,n_not_empty,pct_since_first,pct_filled
first_reg_abroad,2022-01-01,2022-09-09,7,2022-09-09,7,100.0,100.0
key_fixed,2022-01-01,2022-09-09,7,2022-09-09,7,100.0,100.0
no_nap,2022-01-01,2022-09-09,7,2022-09-09,7,100.0,100.0


Fields added in 2022-02-02 through regex parsing


Unnamed: 0,first,last,n,last_filled,n_not_empty,pct_since_first,pct_filled
Vin,2022-02-02,2022-09-09,6,2022-09-09,6,100.0,100.0


Fields added in 2019-12 by rdw api


Unnamed: 0,first,last,n,last_filled,n_not_empty,pct_since_first,pct_filled
rdw_brandstof_nominaal_continu_maximumvermogen_1,2019-12,2019-12,1,2022-09-09,0,6.666667,0.0
rdw_assen_spoorbreedte_3,2019-12,2020-03,2,2022-09-09,0,13.333333,0.0
rdw_assen_weggedrag_code_1,2019-12,2022-05-05,3,2022-09-09,0,20.0,0.0
rdw_assen_weggedrag_code_3,2019-12,2020-03,2,2022-09-09,0,13.333333,0.0
rdw_brandstof_co2_uitstoot_gewogen_2,2019-12,2019-12,1,2022-09-09,0,6.666667,0.0
rdw_versnellingsbak_uitvoering_type_versnellingsbak_2,2019-12,2022-06-06,3,2022-06-06,3,20.0,20.0
rdw_versnellingsbak_uitvoering_aantal_versnellingen_ondergrens_2,2019-12,2022-06-06,3,2022-06-06,3,20.0,20.0
rdw_versnellingsbak_uitvoering_aantal_versnellingen_bovengrens_2,2019-12,2022-06-06,3,2022-06-06,3,20.0,20.0
rdw_handelsbenaming_uitvoering_registratiedatum_handelsbenaming_22,2019-12,2022-05-05,2,2022-05-05,2,13.333333,13.333333
rdw_handelsbenaming_uitvoering_registratiedatum_handelsbenaming_19,2019-12,2022-05-05,3,2022-05-05,2,20.0,13.333333


Fields added in 2020-01 by rdw api


Unnamed: 0,first,last,n,last_filled,n_not_empty,pct_since_first,pct_filled
rdw_motor_uitvoering_brandstof_actieradius_brandstof_extern_oplaadbaar_1_1,2020-01,2022-02-02,3,2022-02-02,3,21.428571,21.428571
rdw_motor_uitvoering_katalysator_indicator_2,2020-01,2021-12-12,2,2021-12-12,2,14.285714,14.285714
rdw_basisgegevens_max_massa_voertuig_bovengrens,2020-01,2020-01,1,2020-01,1,7.142857,7.142857


Fields added in 2020-02 by rdw api


Unnamed: 0,first,last,n,last_filled,n_not_empty,pct_since_first,pct_filled
rdw_brandstof_milieuklasse_eg_goedkeuring_zwaar_2,2020-02,2020-02,1,2022-09-09,0,7.692308,0.0
rdw_brandstof_milieuklasse_eg_goedkeuring_zwaar_1,2020-02,2020-02,1,2020-02,1,7.692308,7.692308
rdw_motor_uitvoering_brandstof_emissie_koolmonoxide_esc_test_1_1,2020-02,2020-02,1,2020-02,1,7.692308,7.692308
rdw_motor_uitvoering_brandstof_emissie_koolwaterstoffen_esc_test_1_1,2020-02,2020-02,1,2020-02,1,7.692308,7.692308
rdw_motor_uitvoering_brandstof_emissie_koolwaterstoffen_etc_test_1_1,2020-02,2020-02,1,2020-02,1,7.692308,7.692308
rdw_motor_uitvoering_brandstof_emissie_stikstofoxiden_esc_test_1_1,2020-02,2020-02,1,2020-02,1,7.692308,7.692308
rdw_motor_uitvoering_brandstof_emissie_vaste_stofdeeltjes_esc_test_1_1,2020-02,2020-02,1,2020-02,1,7.692308,7.692308


Fields added in 2020-03 by rdw api


Unnamed: 0,first,last,n,last_filled,n_not_empty,pct_since_first,pct_filled
rdw_motor_uitvoering_voertuigbrandstof_type_2,2020-03,2021-12-12,3,2021-12-12,3,25.0,25.0
rdw_motor_uitvoering_brandstof_emissie_aant_deeltjes_t1_test_2_1,2020-03,2020-03,1,2020-03,1,8.333333,8.333333
rdw_motor_uitvoering_brandstof_maximale_biobrandstof_percentage_2_1,2020-03,2020-03,1,2020-03,1,8.333333,8.333333
rdw_motor_uitvoering_cng_systeem_2,2020-03,2020-03,1,2020-03,1,8.333333,8.333333
rdw_motor_uitvoering_electromotor_af_fabriek_indicator_2,2020-03,2020-03,1,2020-03,1,8.333333,8.333333
rdw_motor_uitvoering_enkel_electrische_indicator_2,2020-03,2020-03,1,2020-03,1,8.333333,8.333333
rdw_motor_uitvoering_extern_oplaadbaar_2,2020-03,2020-03,1,2020-03,1,8.333333,8.333333
rdw_motor_uitvoering_hybride_elektrisch_voertuig_2,2020-03,2020-03,1,2020-03,1,8.333333,8.333333
rdw_motor_uitvoering_lpg_systeem_2,2020-03,2020-03,1,2020-03,1,8.333333,8.333333


Fields added in 2021-09-09 by rdw api


Unnamed: 0,first,last,n,last_filled,n_not_empty,pct_since_first,pct_filled
rdw_LotType,2021-09-09,2022-09-09,11,2022-09-09,11,100.0,100.0
rdw_ovi_maximum_massa_oplegger_geremd,2021-09-09,2022-09-09,10,2022-09-09,10,90.909091,90.909091
rdw_ovi_plaats_chassis_omschrijving,2021-09-09,2022-09-09,11,2022-09-09,11,100.0,100.0
rdw_ovi_owners,2021-09-09,2022-09-09,11,2022-09-09,11,100.0,100.0
rdw_ovi_omschrijving_herstel,2021-09-09,2022-09-09,8,2022-09-09,8,72.727273,72.727273
rdw_ovi_omschrijving_defect,2021-09-09,2022-09-09,8,2022-09-09,8,72.727273,72.727273
rdw_ovi_mogelijk_gevaar,2021-09-09,2022-09-09,8,2022-09-09,8,72.727273,72.727273
rdw_ovi_merk,2021-09-09,2022-09-09,11,2022-09-09,11,100.0,100.0
rdw_ovi_maximum_massa_voertuig,2021-09-09,2022-09-09,11,2022-09-09,11,100.0,100.0
rdw_ovi_maximum_massa_samenstel,2021-09-09,2022-09-09,11,2022-09-09,11,100.0,100.0


Fields added in 2021-10-10 by rdw api


Unnamed: 0,first,last,n,last_filled,n_not_empty,pct_since_first,pct_filled
rdw_uitvoeringverbruik_per_uitgave_verbruikcategorie_uitvoering_20,2021-10-10,2021-11-11,2,2021-11-11,2,20.0,20.0


Fields added in 2021-11-11 by rdw api


Unnamed: 0,first,last,n,last_filled,n_not_empty,pct_since_first,pct_filled
rdw_uitvoeringverbruik_per_uitgave_verbruikcategorie_uitvoering_21,2021-11-11,2021-11-11,1,2021-11-11,1,11.111111,11.111111
rdw_uitvoeringverbruik_per_uitgave_verbruikcategorie_uitvoering_22,2021-11-11,2021-11-11,1,2021-11-11,1,11.111111,11.111111


Fields added in 2021-12-12 by rdw api


Unnamed: 0,first,last,n,last_filled,n_not_empty,pct_since_first,pct_filled
rdw_brandstof_uitlaatemissieniveau_1,2021-12-12,2022-09-09,8,2022-09-09,8,100.0,100.0
rdw_datum_eerste_toelating_dt,2021-12-12,2022-09-09,8,2022-09-09,8,100.0,100.0
rdw_datum_tenaamstelling_dt,2021-12-12,2022-09-09,8,2022-09-09,8,100.0,100.0
rdw_maximum_last_onder_de_vooras_sen_tezamen_koppeling,2021-12-12,2022-09-09,6,2022-09-09,6,75.0,75.0
rdw_vervaldatum_apk_dt,2021-12-12,2022-09-09,8,2022-09-09,8,100.0,100.0
rdw_zuinigheidsclassificatie,2021-12-12,2022-09-09,8,2022-09-09,8,100.0,100.0
rdw_ovi_maximumtrapondersteundesnelheid,2021-12-12,2022-05-05,3,2022-05-05,3,37.5,37.5
rdw_ovi_vermogen_q,2021-12-12,2022-05-05,3,2022-05-05,3,37.5,37.5
rdw_brandstof_uitlaatemissieniveau_2,2021-12-12,2022-04-04,4,2022-04-04,4,50.0,50.0
rdw_datum_eerste_afgifte_nederland_dt,2021-12-12,2022-02-02,3,2022-02-02,3,37.5,37.5


Fields added in 2022-01-01 by rdw api


Unnamed: 0,first,last,n,last_filled,n_not_empty,pct_since_first,pct_filled
rdw_ovi_datum_gdk,2022-01-01,2022-09-09,7,2022-09-09,7,100.0,100.0
rdw_brandstof_klasse_hybride_elektrisch_voertuig_1,2022-01-01,2022-02-02,2,2022-02-02,2,28.571429,28.571429
rdw_brandstof_klasse_hybride_elektrisch_voertuig_2,2022-01-01,2022-02-02,2,2022-02-02,2,28.571429,28.571429
rdw_ovi_afg_dat_kent,2022-01-01,2022-02-02,2,2022-02-02,2,28.571429,28.571429


Fields added in 2022-02-02 by rdw api


Unnamed: 0,first,last,n,last_filled,n_not_empty,pct_since_first,pct_filled
rdw_api_gebrek_beschrijving,2022-02-02,2022-09-09,6,2022-09-09,6,100.0,100.0
rdw_gebrek_constateringen_meld_tijd_door_keuringsinstantie_4,2022-02-02,2022-09-09,4,2022-09-09,4,66.666667,66.666667
rdw_keuringen_vervaldatum_keuring_dt,2022-02-02,2022-09-09,6,2022-09-09,6,100.0,100.0
rdw_keuringen_vervaldatum_keuring,2022-02-02,2022-09-09,6,2022-09-09,6,100.0,100.0
rdw_handelsbenaming_uitvoering_registratiedatum_handelsbenaming_dt_1,2022-02-02,2022-09-09,6,2022-09-09,6,100.0,100.0
rdw_gebrek_constateringen_soort_erkenning_omschrijving_4,2022-02-02,2022-09-09,4,2022-09-09,4,66.666667,66.666667
rdw_gebrek_constateringen_soort_erkenning_omschrijving_3,2022-02-02,2022-09-09,4,2022-09-09,4,66.666667,66.666667
rdw_gebrek_constateringen_soort_erkenning_omschrijving_2,2022-02-02,2022-09-09,5,2022-09-09,5,83.333333,83.333333
rdw_gebrek_constateringen_soort_erkenning_omschrijving_1,2022-02-02,2022-09-09,5,2022-09-09,5,83.333333,83.333333
rdw_gebrek_constateringen_soort_erkenning_keuringsinstantie_4,2022-02-02,2022-09-09,4,2022-09-09,4,66.666667,66.666667


Fields added in 2022-04-04 by rdw api


Unnamed: 0,first,last,n,last_filled,n_not_empty,pct_since_first,pct_filled
rdw_datum_eerste_tenaamstelling_in_nederland,2022-04-04,2022-09-09,5,2022-09-09,5,100.0,100.0
rdw_datum_eerste_tenaamstelling_in_nederland_dt,2022-04-04,2022-09-09,5,2022-09-09,5,100.0,100.0
rdw_toegevoegde_objecten_soort_toe_te_voegen_object_omschrijving_1,2022-04-04,2022-09-09,3,2022-09-09,3,60.0,60.0
rdw_toegevoegde_objecten_montagedatum_dt_1,2022-04-04,2022-09-09,3,2022-09-09,3,60.0,60.0
rdw_toegevoegde_objecten_merkcode_toegevoegd_object_1,2022-04-04,2022-09-09,3,2022-09-09,3,60.0,60.0
rdw_toegevoegde_objecten_merk_object_toegevoegd_1,2022-04-04,2022-09-09,3,2022-09-09,3,60.0,60.0
rdw_toegevoegde_objecten_demontagedatum_1,2022-04-04,2022-09-09,3,2022-09-09,3,60.0,60.0
rdw_nr_of_toegevoegde_objecten,2022-04-04,2022-09-09,3,2022-09-09,3,60.0,60.0
rdw_toegevoegde_objecten_uitvoerings_volgnr_toegev_obj_1,2022-04-04,2022-09-09,3,2022-09-09,3,60.0,60.0
rdw_gebrek_constateringen_meld_datum_door_keuringsinstantie_dt_11,2022-04-04,2022-06-06,2,2022-06-06,2,40.0,40.0


Fields added in 2022-05-05 by rdw api


Unnamed: 0,first,last,n,last_filled,n_not_empty,pct_since_first,pct_filled
rdw_handelsbenaming_uitvoering_handelsbenaming_fabrikant_45,2022-05-05,2022-05-05,1,2022-05-05,1,25.0,25.0
rdw_handelsbenaming_uitvoering_registratiedatum_handelsbenaming_dt_41,2022-05-05,2022-05-05,1,2022-05-05,1,25.0,25.0
rdw_handelsbenaming_uitvoering_registratiedatum_handelsbenaming_dt_31,2022-05-05,2022-05-05,1,2022-05-05,1,25.0,25.0
rdw_handelsbenaming_uitvoering_registratiedatum_handelsbenaming_dt_32,2022-05-05,2022-05-05,1,2022-05-05,1,25.0,25.0
rdw_handelsbenaming_uitvoering_registratiedatum_handelsbenaming_dt_33,2022-05-05,2022-05-05,1,2022-05-05,1,25.0,25.0
rdw_handelsbenaming_uitvoering_registratiedatum_handelsbenaming_dt_34,2022-05-05,2022-05-05,1,2022-05-05,1,25.0,25.0
rdw_handelsbenaming_uitvoering_registratiedatum_handelsbenaming_dt_35,2022-05-05,2022-05-05,1,2022-05-05,1,25.0,25.0
rdw_handelsbenaming_uitvoering_registratiedatum_handelsbenaming_dt_36,2022-05-05,2022-05-05,1,2022-05-05,1,25.0,25.0
rdw_handelsbenaming_uitvoering_registratiedatum_handelsbenaming_dt_37,2022-05-05,2022-05-05,1,2022-05-05,1,25.0,25.0
rdw_handelsbenaming_uitvoering_registratiedatum_handelsbenaming_dt_38,2022-05-05,2022-05-05,1,2022-05-05,1,25.0,25.0


Fields added in 2022-08-08 by rdw api


Unnamed: 0,first,last,n,last_filled,n_not_empty,pct_since_first,pct_filled
rdw_carrosserie_uitvoering_klasse_carrosserie_volgnummer,2022-08-08,2022-09-09,2,2022-09-09,2,100.0,100.0
rdw_carrosserie_uitvoering_klasse_type_carrosserie_europees,2022-08-08,2022-09-09,2,2022-09-09,2,100.0,100.0


Fields added in 2022-09-09 by rdw api


Unnamed: 0,first,last,n,last_filled,n_not_empty,pct_since_first,pct_filled
rdw_merk_uitvoering_toegestaan_merk_registratie_datum,2022-09-09,2022-09-09,1,2022-09-09,1,100.0,100.0
rdw_merk_uitvoering_toegestaan_merkcode,2022-09-09,2022-09-09,1,2022-09-09,1,100.0,100.0


- - - -
# Continue with cars only

In [9]:
# select data
car=df[df.LotType == 'Personenauto'].copy()

if VERBOSE > 0:
    # print lots that are cars per auction
    df_ = pd.DataFrame(columns=['year', 'month', 'lot'])
    df_.year = pd.Series(car.index).apply(lambda x: int(x.split('-')[0]))
    df_.month = pd.Series(car.index).apply(lambda x: int(x.split('-')[1]))
    df_.lot = pd.Series(car.index).apply(lambda x: int(x.split('-')[2]))
    with pd.option_context('display.max_rows', 999):
        display(df_.groupby(['year', 'month']).lot.agg(['min', 'max', 'count']).sort_index())
    

display(car.tail(), metadata={"tags":(TAG_SINGLE, )})

Unnamed: 0_level_0,Unnamed: 1_level_0,min,max,count
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019,11,1005,8334,208
2019,12,1001,9429,170
2020,1,1004,7312,174
2020,2,1004,8304,35
2020,3,1007,1913,139
2021,9,1000,1119,87
2021,10,1000,1013,10
2021,11,1002,1060,36
2021,12,1000,1055,42
2022,1,1000,1060,38


# Save file

In [10]:
file_name = r'../data/cars-from-all-auctions.pkl'
if OPBOD:
    file_name = file_name.replace('.pkl', '-opbod.pkl')
    
if (SKIPSAVE==False): #and (not(os.path.isfile(file_name))):
    print(file_name)
    car.to_pickle(file_name)
else:
    print(f'Skip. {file_name} exists or saving is disabled in settings.')
    


../data/cars-from-all-auctions-opbod.pkl


# Write example results to file
based on: https://stackoverflow.com/a/33869154

In [11]:
def pandas_df_to_markdown_table(df):
    fmt = ['-----' for i in range(len(df.columns))]
    df_fmt = pd.DataFrame([fmt], columns=df.columns)
    df_formatted = pd.concat([df_fmt, df])
    return df_formatted.to_csv(sep="|", index=False)


In [12]:
file_name = '../assets/example-table-of-cars.md'

example = car.tail(10).copy()

# trim some long fields
example.rdwinfo = '.. rdw info ..'
example.Raw_text = '.. raw text ..'
example.SupInfo = '.. suplm. info. ..'

# convert to md
try:
    table_text = example.to_markdown()
except ImportError:
    print('Fallback')
    table_text = pandas_df_to_markdown_table(example)

# save
if (SKIPSAVE==False) and (not(os.path.isfile(file_name))):
    with open(file_name,'w') as file:
        file.write(table_text)

    print('A markdown table is available as\n\t{}'.format(file_name))
else:
    print(f'Skip. {file_name} exists or saving is disabled in settings.')
    display(example, metadata={"tags":(TAG_SINGLE, )})
    


Fallback
Skip. ../assets/example-table-of-cars.md exists or saving is disabled in settings.


In [13]:
# idx = df.index.str.split('-', expand=True).droplevel(-1)
# is_last_auction = idx == idx[-1]
# subsel_reg = df.loc[is_last_auction, 'Reg'].drop_duplicates().values
# VERBOSE

In [14]:
# vc2

In [15]:
# show recurring lots (based on registration no.)

vc = df.Reg.value_counts()

# drop obvious
vc.drop(['onbekend', ''], inplace=True, errors='ignore')

# reduce output
if VERBOSE < 2:
    # only of last auction
    idx = df.index.str.split('-', expand=True).droplevel(-1)
    is_last_auction = idx == idx[-1]
    subsel_reg = df.loc[is_last_auction, 'Reg'].drop_duplicates().values
    vc = vc[vc.index.isin(subsel_reg)]
    
# show recurring
for r in vc[vc > 1].index:
    # title
    print(r, vc[r], end=': ')
    sel_reg = df.Reg==r
    print(' - '.join(list(df.loc[sel_reg, ['ItemBrand', 'ItemType', 'rdw_datum_eerste_afgifte_nederland']].drop_duplicates(ignore_index=True).iloc[0].astype(str))), end=' ')
    
    df_ = df.loc[sel_reg, list(df.columns.drop([
    'Raw_text', 'rdwinfo', 'rdw_merk_registratie_datum', 'rdw_merk_registratie_datum_dt', 'rdw_merkcode', 'Images', 'Source'
    ], errors='ignore'))].sort_values(by='Reg').T

    # convert index to nr of days since first
    ix = pd.Series(df_.columns.map(lambda x: pd.to_datetime('/'.join(x.split('-')[:2])))).diff().dt.days.copy()
    ix[0] = 0
    ix=ix.astype(int).astype(str)
    ix[0] = pd.to_datetime('/'.join(df_.columns[0].split('-')[:2])).strftime('%Y/%m')

    df_.columns=ix

    if (VERBOSE < 2) | (df_.shape[1] > 4):

        vc2 = df_.applymap(lambda x: str(x) if isinstance(x, list) else x).nunique(axis=1)
        print('')
        display(df_.loc[list(vc2[vc2>1].index),:])
    elif df_.shape[1] > 2:
        display(df_.loc[['Price'],:].T)
    else:
        print(*list(df_.columns))
        

54-HR-ZV 2: CHRYSLER - pt cruiser; 2.0i 16v 5-sp. - nan 


Unnamed: 0,2022/06,92
Title,Kavel K2206011004,Kavel K2209011010
Price,250.0,300.0
LotNr,K2206011004,K2209011010
APKdate,01.07.2022,
rdw_TimeStamp_x,20220610,20220911
rdw_api_carrosserie_uitvoering,https://opendata.rdw.nl/Voertuigen/Open-Data-R...,https://opendata.rdw.nl/Voertuigen/Open-Data-R...
rdw_api_carrosserie_uitvoering_klasse,https://opendata.rdw.nl/Voertuigen/Open-Data-R...,https://opendata.rdw.nl/Voertuigen/Open-Data-R...
rdw_TimeStamp_y,20220610,20220911
lot_counter,1004.0,1010.0
rdw_code_toelichting_tellerstandoordeel,0.0,00
