<img src="media/logo_psa.jpg" width="300">

<h1><center>Constructing OPV</center></h1>

### Imports

In [1]:
%load_ext autoreload
%autoreload 2
import os
import datetime
import pandas as pd

from pyspark.sql import functions as F
import numpy as  np

from distribution_cost.configuration import spark_config
from distribution_cost.configuration.app import AppConfig
from distribution_cost.configuration.data import DataConfig
from distribution_cost.domain import kpis

/gpfs/user/e587246/dco00/conf/application.yml
/gpfs/user/e587246/dco00


In [2]:
# Database uri
app_config = AppConfig()

db_uri = app_config.db_uri_jdbc
db_uri

'jdbc:oracle:thin:BRC03_VMECA/8sUFYtvK@//pyox2k01:1521/BRCEX_PP2'

In [3]:
# Create spark session
spark_context, spark_session = spark_config.get_spark(app_name="app-distribution-cost",
                                                      executors=2, executor_cores=4, executor_mem='5g',
                                                      dynamic_allocation=True, max_executors=8)

In [4]:
dfOPV = spark_session.read.option("fetchsize", 10000).jdbc(db_uri, table="SMKT002_OPV_TEMP").cache()

In [29]:
dfOPV.columns

['ACC',
 'ACC_PRE',
 'ANNEE_VEH_REP',
 'BONUS_MALUS',
 'NCL_CODCRE',
 'COD_PROM',
 'CONTRAT_SERVICE',
 'DATE_IMMAT_REP',
 'DATE_LIV_REELLE',
 'DATE_LIVRAISON',
 'OFR_DATE',
 'ENER_REP',
 'FINITION',
 'FRAIS',
 'COD_BCD',
 'TYPECLI',
 'STATUS_CDC',
 'COD_COMMANDE',
 'KM_REP',
 'LCDV',
 'TYPE_CONTRAT',
 'MARQUE_REPRISE',
 'MODELO_REPRISE',
 'OPCIONES',
 'OPCIONES_PRE',
 'PRIX_FINAL',
 'NCL_PD_SUBTOTAL2',
 'PRIX_HT_CATALOGUE27',
 'tarif+OPTTIONS',
 'PRIX_TTC_CATALOGUE',
 'NCL_PR_SUBTOTAL2',
 'TARIF_VEHICULE',
 'TARIF_VEHICULE_PRE',
 'tarif+options_PRE',
 'Taxe parafiscale',
 'TOTAL_REMISE',
 'TOTAL_REMISE_PRE',
 'INI_GAR',
 'FIN_GAR',
 'PRIX_HT_CATALOGUE39',
 'TRANSFORMATIONS',
 'TRANSFORMATIONS_PRE',
 'VER_ENER',
 'AIDE_REPRISE',
 'TYPE_ACHAT',
 'FINANCEMENT',
 'FRAIS_ANEXXES',
 'VEHICULE_REPENDRE_PRE',
 'VER_VEH',
 'NCL_VO_PRIMACONVERSION',
 'NCL_VO_VALON',
 'NCL_VO_IMPAYUDAREC',
 'CRE_MARCA',
 'CDC_NUMVIN',
 'CRE_RAISOC',
 'COD_OFR',
 'NUMCLI',
 'DR',
 'zone',
 'DATE_COMANDE']

# Treatments

In [5]:
dfOPV = dfOPV.select('CDC_NUMVIN','CRE_MARCA','NCL_CODCRE','DATE_COMANDE','NCL_PD_SUBTOTAL2','FINITION','ACC_PRE','NCL_VO_IMPAYUDAREC','FRAIS_ANEXXES','TOTAL_REMISE_PRE','TOTAL_REMISE','PRIX_FINAL','tarif+options_PRE','tarif+OPTTIONS','NCL_VO_PRIMACONVERSION','ACC','CONTRAT_SERVICE','BONUS_MALUS','NCL_VO_VALON','TRANSFORMATIONS','Taxe parafiscale').filter(~(F.col('CDC_NUMVIN')=='                 ')).cache()
dfOPV = dfOPV.drop_duplicates()
dfOPV = dfOPV.orderBy('DATE_COMANDE', ascending=False).coalesce(1).dropDuplicates(subset=['CDC_NUMVIN'])

In [6]:
mapping_columns_france_ap = {'CDC_NUMVIN':'VIN', 'COD_OFR':'Code offre', 'zone':'Zone','DR':'Direction régionale','NCL_CODCRE':'Code point de vente','FINITION':'Finition','DATE_COMMANDE':'Date de commande','PRIX_FINAL':'Prix final TTC','OPCIONES':'Options TTC','OPCIONES_PRE':'Options HT','ACC':'Accessoires TTC','ACC_PRE':'Accessoires HT','TARIF_VEHICULE':'Tarif du vehicule acheté TTC','TARIF_VEHICULE_PRE':'Tarif du vehicule acheté HT','TOTAL_REMISE':'Total de la remise TTC','TOTAL_REMISE_PRE':'Total de la remise HT','NCL_VO_VALON':'Prix du véhicule repris TTC','VEHICULE_REPENDRE_PRE':'Prix du véhicule repris HT','tarif+OPTIONS':'List price TTC','tarif+options_PRE':'List price HT','NCL_PD_SUBTOTAL2':'Prix facturé HT','NCL_VO_IMPAYUDAREC':'Aide à la reprise TTC','FRAIS_ANEXXES':'Frais annexes TTC','NCL_VO_PRIMACONVERSION':'Prime à la conversion TTC','TRANSFORMATIONS':'Transformations TTC','TRANSFORMATIONS_PRE':'Transformations HT','CONTRAT_SERVICE':'Contrat service TTC','BONUS_MALUS':'Bonus malus TTC','Taxe parafiscale':'Taxe parafiscale TTC'}
mapping_columns_france_ac_ds = {'CDC_NUMVIN':'VIN', 'COD_OFR':'Code offre', 'zone':'Zone','DR':'Direction régionale','NCL_CODCRE':'Code point de vente','FINITION':'Finition','DATE_COMMANDE':'Date de commande','PRIX_FINAL':'Prix final TTC','OPCIONES':'Options TTC','OPCIONES_PRE':'Options HT','ACC':'Accessoires TTC','ACC_PRE':'Accessoires HT','TARIF_VEHICULE':'Tarif du vehicule acheté TTC','TARIF_VEHICULE_PRE':'Tarif du vehicule acheté HT','TOTAL_REMISE':'Total de la remise TTC','TOTAL_REMISE_PRE':'Total de la remise HT','NCL_VO_VALON':'Prix du véhicule repris TTC','VEHICULE_REPENDRE_PRE':'Prix du véhicule repris HT','tarif+OPTIONS':'List price TTC','tarif+options_PRE':'List price HT','NCL_PD_SUBTOTAL2':'Prix facturé HT','NCL_VO_IMPAYUDAREC':'Aide à la reprise TTC','FRAIS_ANEXXES':'Frais annexes TTC','NCL_VO_PRIMACONVERSION':'Prime à la conversion TTC','TRANSFORMATIONS':'Transformations TTC','TRANSFORMATIONS_PRE':'Transformations HT','CONTRAT_SERVICE':'Contrat service TTC','BONUS_MALUS':'Bonus malus TTC'}
mapping_columns_austria = {'CDC_NUMVIN':'VIN', 'COD_OFR':'Code offre', 'zone':'Zone','DR':'Direction régionale','NCL_CODCRE':'Code point de vente','FINITION':'Finition','DATE_COMMANDE':'Date de commande','AUFZAHLUNG':'Prix final TTC','OPCIONES':'Options TTC','OPCIONES_PRE':'Options HT','ACC':'Accessoires TTC','ACC_PRE':'Accessoires HT','TARIF_VEHICULE':'Tarif du vehicule acheté TTC','TARIF_VEHICULE_PRE':'Tarif du vehicule acheté HT','TOTAL_REMISE':'Total de la remise TTC','TOTAL_REMISE_PRE':'Total de la remise HT','NCL_VO_VALON':'Prix du véhicule repris TTC','VEHICULE_REPENDRE_PRE':'Prix du véhicule repris HT','tarif+OPTIONS':'List price TTC','tarif+options_PRE':'List price HT','NCL_PD_SUBTOTAL2':'Prix facturé HT'}
mapping_columns_belgium_ac_ds = {'CDC_NUMVIN':'VIN', 'COD_OFR':'Code offre', 'zone':'Zone','DR':'Direction régionale','NCL_CODCRE':'Code point de vente','FINITION':'Finition','DATE_COMMANDE':'Date de commande','NCL_PR_SUBTOTAL2':'Prix final TTC','OPCIONES':'Options TTC','OPCIONES_PRE':'Options HT','ACC':'Accessoires TTC','ACC_PRE':'Accessoires HT','TARIF_VEHICULE':'Tarif du vehicule acheté TTC','TARIF_VEHICULE_PRE':'Tarif du vehicule acheté HT','TOTAL_REMISE':'Total de la remise TTC','TOTAL_REMISE_PRE':'Total de la remise HT','NCL_VO_VALON':'Prix du véhicule repris TTC','VEHICULE_REPENDRE_PRE':'Prix du véhicule repris HT','tarif+OPTIONS':'List price TTC','tarif+options_PRE':'List price HT','NCL_PD_SUBTOTAL2':'Prix facturé HT','NCL_VHU_CIVA':'Aide à la reprise TTC','OFR_DESCUENTOPRIX':'Remise (Korting) TTC','NCL_PR_FLOTADES':'Remise stock TTC'}
mapping_columns_portugual = {'CDC_NUMVIN':'VIN', 'COD_OFR':'Code offre', 'zone':'Zone','DR':'Direction régionale','NCL_CODCRE':'Code point de vente','FINITION':'Finition','DATE_COMMANDE':'Date de commande','NCL_PR_SUBTOTAL2':'Prix final TTC','OPCIONES':'Options TTC','OPCIONES_PRE':'Options HT','ACC':'Accessoires TTC','ACC_PRE':'Accessoires HT','TARIF_VEHICULE':'Tarif du vehicule acheté TTC','TARIF_VEHICULE_PRE':'Tarif du vehicule acheté HT','TOTAL_REMISE':'Total de la remise TTC','TOTAL_REMISE_PRE':'Total de la remise HT','NCL_VO_VALON':'Prix du véhicule repris TTC','VEHICULE_REPENDRE_PRE':'Prix du véhicule repris HT','tarif+OPTIONS':'List price TTC','tarif+options_PRE':'List price HT','TRANSFORMATIONS':'Transformations TTC','TRANSFORMATIONS_PRE':'Transformations HT','NCL_VN_SERVPRIXTOT':'Contrat service TTC','IPT':'IPT TTC','NCL_PD_SUBTOTAL2':'Sous-total HT','WCS_TARIF_OFR_TTC':'Services connectés TTC','NCL_TRANSFOR1PVP':'Modules complémentaires TTC','NCL_PD_IVAPRIX':'TVA'}
mapping_columns_belgim_ap = {'CDC_NUMVIN':'VIN', 'COD_OFR':'Code offre', 'zone':'Zone','DR':'Direction régionale','NCL_CODCRE':'Code point de vente','FINITION':'Finition','DATE_COMMANDE':'Date de commande','PRIX_FINAL':'Prix final TTC','OPCIONES':'Options TTC','OPCIONES_PRE':'Options HT','ACC':'Accessoires TTC','ACC_PRE':'Accessoires HT','TARIF_VEHICULE':'Tarif du vehicule acheté TTC','TARIF_VEHICULE_PRE':'Tarif du vehicule acheté HT','TOTAL_REMISE':'Total de la remise TTC','TOTAL_REMISE_PRE':'Total de la remise HT','NCL_VO_VALON':'Prix du véhicule repris TTC','VEHICULE_REPENDRE_PRE':'Prix du véhicule repris HT','tarif+OPTIONS':'List price TTC','tarif+options_PRE':'List price HT','NCL_PD_SUBTOTAL2':'Prix facturé HT','NCL_VHU_CIVA':'Aide à la reprise TTC','OFR_DESCUENTOPRIX':'Remise (Korting) TTC','NCL_PR_FLOTADES':'Remise stock TTC'}
mapping_columns_spain_ap = {'CDC_NUMVIN':'VIN', 'COD_OFR':'Code offre', 'zone':'Zone','DR':'Direction régionale','NCL_CODCRE':'Code point de vente','FINITION':'Finition','DATE_COMMANDE':'Date de commande','PRIX_FINAL':'Prix final TTC','OPCIONES':'Options TTC','OPCIONES_PRE':'Options HT','ACC':'Accessoires TTC','ACC_PRE':'Accessoires HT','TARIF_VEHICULE':'Tarif du vehicule acheté TTC','TARIF_VEHICULE_PRE':'Tarif du vehicule acheté HT','TOTAL_REMISE':'Total de la remise TTC','TOTAL_REMISE_PRE':'Total de la remise HT','NCL_VO_VALON':'Prix du véhicule repris TTC','VEHICULE_REPENDRE_PRE':'Prix du véhicule repris HT','tarif+OPTIONS':'List price TTC','tarif+options_PRE':'List price HT','NCL_PD_SUBTOTAL2':'Prix facturé HT','SUPLIDOS':'Frais annexes TTC','TOTAL_SERVICIOS':'Total services TTC','NCL_PD_IMPMATPRIX':'Matriculation TTC','DEDUCCION_IMPUESTOS':'Déductions TTC'}
mapping_columns_spain_ac_ds = {'CDC_NUMVIN':'VIN', 'COD_OFR':'Code offre', 'zone':'Zone','DR':'Direction régionale','NCL_CODCRE':'Code point de vente','FINITION':'Finition','DATE_COMMANDE':'Date de commande','PRIX_FINAL':'Prix final TTC','OPCIONES':'Options TTC','OPCIONES_PRE':'Options HT','ACC':'Accessoires TTC','ACC_PRE':'Accessoires HT','TARIF_VEHICULE':'Tarif du vehicule acheté TTC','TARIF_VEHICULE_PRE':'Tarif du vehicule acheté HT','TOTAL_REMISE':'Total de la remise TTC','TOTAL_REMISE_PRE':'Total de la remise HT','NCL_VO_VALON':'Prix du véhicule repris TTC','VEHICULE_REPENDRE_PRE':'Prix du véhicule repris HT','tarif+OPTIONS':'List price TTC','tarif+options_PRE':'List price HT','NCL_PD_SUBTOTAL2':'Prix facturé HT','SUPLIDOS':'Frais annexes TTC','TOTAL_SERVICIOS':'Total services TTC','NCL_PD_IMPMATPRIX':'Matriculation TTC','NCL_PR_DEDUCC_IM':'Déductions TTC'}
mapping_columns_germany = {'CDC_NUMVIN':'VIN', 'COD_OFR':'Code offre', 'zone':'Zone','DR':'Direction régionale','NCL_CODCRE':'Code point de vente','FINITION':'Finition','DATE_COMMANDE':'Date de commande','PRIX_FINAL':'Prix final TTC','OPCIONES':'Options TTC','OPCIONES_PRE':'Options HT','ACC':'Accessoires TTC','ACC_PRE':'Accessoires HT','TARIF_VEHICULE':'Tarif du vehicule acheté TTC','TARIF_VEHICULE_PRE':'Tarif du vehicule acheté HT','TOTAL_REMISE':'Total de la remise TTC','TOTAL_REMISE_PRE':'Total de la remise HT','NCL_VO_VALON':'Prix du véhicule repris TTC','VEHICULE_REPENDRE_PRE':'Prix du véhicule repris HT','tarif+OPTIONS':'List price TTC','tarif+options_PRE':'List price HT','NCL_PD_SUBTOTAL2':'Prix facturé HT','überführung':'überführung HT','SERVICELEISTUNGENPR':'Services de personnalisation TTC','SERVICELEISTUNGENPD':'Services de personnalisation HT'}
mapping_columns_italy_ap = {'CDC_NUMVIN':'VIN', 'COD_OFR':'Code offre', 'zone':'Zone','DR':'Direction régionale','NCL_CODCRE':'Code point de vente','FINITION':'Finition','DATE_COMMANDE':'Date de commande','AUFZAHLUNG':'Prix final TTC','OPCIONES':'Options TTC','OPCIONES_PRE':'Options HT','ACC':'Accessoires TTC','ACC_PRE':'Accessoires HT','TARIF_VEHICULE':'Tarif du vehicule acheté TTC','TARIF_VEHICULE_PRE':'Tarif du vehicule acheté HT','TOTAL_REMISE':'Total de la remise TTC','TOTAL_REMISE_PRE':'Total de la remise HT','NCL_VO_VALON':'Prix du véhicule repris TTC','VEHICULE_REPENDRE_PRE':'Prix du véhicule repris HT','tarif+OPTIONS':'List price TTC','tarif+options_PRE':'List price HT','NCL_PD_SUBTOTAL2':'Prix facturé HT','NCL_PR_DEDUCC_IM':'Déductions TTC','IPT':'IPT TTC','SPESE_GES':'Frais de gestion TTC','NCL_VN_SUPLIDOS':'Frais  divers TTC','WCS_TARIF_OFR_TTC + NCL_VN_SERVPRIXTOT':'Services TTC','NCL_PR_DESVPO':'Surévaluation','NCL_MRT':'Inscription taxable','NCL_FMR':'Exempt de frais','KIT_SICUREZZA':'Safety box TTC'}
mapping_columns_italy_ac_ds = {'CDC_NUMVIN':'VIN', 'COD_OFR':'Code offre', 'zone':'Zone','DR':'Direction régionale','NCL_CODCRE':'Code point de vente','FINITION':'Finition','DATE_COMMANDE':'Date de commande','AUFZAHLUNG':'Prix final TTC','OPCIONES':'Options TTC','OPCIONES_PRE':'Options HT','ACC':'Accessoires TTC','ACC_PRE':'Accessoires HT','TARIF_VEHICULE':'Tarif du vehicule acheté TTC','TARIF_VEHICULE_PRE':'Tarif du vehicule acheté HT','TOTAL_REMISE':'Total de la remise TTC','TOTAL_REMISE_PRE':'Total de la remise HT','NCL_VO_VALON':'Prix du véhicule repris TTC','VEHICULE_REPENDRE_PRE':'Prix du véhicule repris HT','tarif+OPTIONS':'List price TTC','tarif+options_PRE':'List price HT','NCL_PD_SUBTOTAL2':'Prix facturé HT','NCL_PR_DEDUCC_IM':'Déductions TTC','IPT':'IPT TTC','SPESE_GES':'Frais de gestion TTC','NCL_VN_SUPLIDOS':'Frais  divers TTC','WCS_TARIF_OFR_TTC + NCL_VN_SERVPRIXTOT':'Services TTC','NCL_PR_DESVPO':'Surévaluation','NCL_MRT':'Inscription taxable','NCL_FMR':'Exempt de frais','NCL_PR_PRECIOACF':'Safety box TTC'}


In [7]:
dfOPV = dfOPV.withColumnRenamed('CDC_VIN', 'VIN')
# dfOPV = dfOPV.withColumnRenamed('Taxe parafiscale', 'TAXE_PARAFISCALE')
# dfOPV = dfOPV.withColumn('LIB_PAYS', F.lit('France'))

In [8]:
dfOPV= dfOPV.withColumn('NCL_PD_SUBTOTAL2', F.when(F.col('NCL_PD_SUBTOTAL2')=='0', np.nan).otherwise(F.col('NCL_PD_SUBTOTAL2')))
dfOPV= dfOPV.withColumn('tarif+OPTTIONS', F.when(F.col('tarif+OPTTIONS')=='0', np.nan).otherwise(F.col('tarif+OPTTIONS')))
dfOPV= dfOPV.withColumn('tarif+options_PRE', F.when(F.col('tarif+options_PRE')=='0', np.nan).otherwise(F.col('tarif+options_PRE')))

In [34]:
dfOPV.printSchema()

root
 |-- CDC_NUMVIN: string (nullable = true)
 |-- CRE_MARCA: string (nullable = true)
 |-- NCL_CODCRE: string (nullable = true)
 |-- DATE_COMANDE: string (nullable = true)
 |-- NCL_PD_SUBTOTAL2: string (nullable = true)
 |-- FINITION: string (nullable = true)
 |-- ACC_PRE: string (nullable = true)
 |-- NCL_VO_IMPAYUDAREC: string (nullable = true)
 |-- FRAIS_ANEXXES: string (nullable = true)
 |-- TOTAL_REMISE_PRE: string (nullable = true)
 |-- TOTAL_REMISE: string (nullable = true)
 |-- PRIX_FINAL: string (nullable = true)
 |-- tarif+options_PRE: string (nullable = true)
 |-- tarif+OPTTIONS: string (nullable = true)
 |-- NCL_VO_PRIMACONVERSION: string (nullable = true)
 |-- ACC: string (nullable = true)
 |-- CONTRAT_SERVICE: string (nullable = true)
 |-- BONUS_MALUS: string (nullable = true)
 |-- NCL_VO_VALON: string (nullable = true)
 |-- TRANSFORMATIONS: string (nullable = true)
 |-- Taxe parafiscale: string (nullable = true)



In [13]:
dfOPV = dfOPV.withColumn('DATE_COMANDE',F.to_date('DATE_COMANDE','yyyy-MM-dd'))
dfOPV = dfOPV.withColumn('LIB_PAYS',F.lit('France'))
dfOPV = dfOPV.withColumnRenamed('Taxe parafiscale','TAXE_PARAFISCALE')

## Load on cluster

In [7]:
dfOPV = spark_session.read.load("/user/e587247/data/raw/opvv30c")

In [8]:
df_with_year_and_month_and_day = (
    dfOPV.withColumn("year", F.year(F.col("DATE_COMANDE")))
                   .withColumn("month", F.month(F.col("DATE_COMANDE")))
) 

In [9]:
df_with_year_and_month_and_day.write.mode("append").partitionBy("Country","CRE_MARCA","year","month").parquet("/user/brc03/vmeca/data/refined/opv")

In [8]:
dfOPV.drop('year','month','day').write.option("truncate","true").jdbc(url=db_uri, table="SMKT008_OPV", mode="overwrite")

In [10]:
spark_session.stop()