<img src="media/logo_psa.jpg" width="300">

<h1><center>Constructing SAMARA Data (2.1. SAMARA)</center></h1>

### Imports

In [1]:
%load_ext autoreload
%autoreload 2
import os
import datetime
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)

from pyspark.sql import functions as F

from distribution_cost.configuration import spark_config
from distribution_cost.configuration.app import AppConfig
from distribution_cost.configuration.data import DataConfig
from distribution_cost.infra import oracle
from distribution_cost.domain import kpis

/gpfs/user/e587247/dco00/conf/application.yml
/gpfs/user/e587247/dco00


In [2]:
# Database uri
app_config = AppConfig()

db_uri = app_config.db_uri_jdbc
db_uri_cx_oracle = app_config.db_uri_cx_oracle

In [3]:
# Data Config
data_config = DataConfig()

data_config.vhls_perimeter

{'sites': ['PY', 'MU'],
 'start_date': '15/01/20',
 'end_date': '17/01/20',
 'genr_door': 'EMON'}

In [4]:
sites = data_config.vhls_perimeter["sites"]
start_date = data_config.vhls_perimeter["start_date"]
end_date = data_config.vhls_perimeter["end_date"]
genr_door = data_config.vhls_perimeter["genr_door"]

In [5]:
# Create spark session
spark_context, spark_session = spark_config.get_spark(app_name="app-distribution-cost",
                                                      executors=4, executor_cores=4, executor_mem='16g',
                                                      dynamic_allocation=True, max_executors=8)

In [6]:
querySAMARA = """
(SELECT
    SINQTVIN.CODE SINQTVIN__CODE,
    SINQTCLI_2.CODE SINQTCLI_2__CODE,
    SINQTCLI_2.CODE_PAYS_IMPLANT,
    SINQTVER.CODE SINQTVER__CODE,
    case 'fr_FR'
when 'en_GB' then nvl(SINQTVER.LIB_EN,SINQTVER.LIB_FR) 
when 'fr_FR' then SINQTVER.LIB_FR
when 'es_SP' then nvl(SINQTVER.LIB_ES,SINQTVER.LIB_FR)
else SINQTVER.LIB_FR

end SINQTVER__LIB,
    Table__54.DT_FACT,
    Table__54.DT_VD,
    Table__54.DT_COMM_CLI_FIN_VD,
    Table__54.DATIMM,
    SINQTCMP.CODE SINQTCMP__CODE,
    case 'fr_FR'
when 'en_GB' then nvl(SINQTCMP.LIB_EN,SINQTCMP.LIB_FR)
when 'fr_FR' then SINQTCMP.LIB_FR
when 'es_SP' then nvl(SINQTCMP.LIB_ES,SINQTCMP.LIB_FR)
else SINQTCMP.LIB_FR

end SINQTCMP__LIB,
    SINQTCND.CODE SINQTCND__CODE,
    case 'fr_FR'
when 'en_GB' then nvl(SINQTCND.LIB_EN,SINQTCND.LIB_FR)
when 'fr_FR' then SINQTCND.LIB_FR
when 'es_SP' then nvl(SINQTCND.LIB_ES,SINQTCND.LIB_FR)
else SINQTCND.LIB_FR

end SINQTCND__LIB,
    SINQTCLI.CODE SINQTCLI__CODE,
    case 'fr_FR'
when 'en_GB' then nvl(SINQTCLI.LIB_EN,SINQTCLI.LIB_FR)
when 'fr_FR' then SINQTCLI.LIB_FR
when 'es_SP' then nvl(SINQTCLI.LIB_ES,SINQTCLI.LIB_FR)
else SINQTCLI.LIB_FR

end SINQTCLI__LIB,
    SINQTSEG.CODE SINQTSEG__CODE,
    case 'fr_FR'
when 'en_GB' then nvl(SINQTSEG.LIB_EN,SINQTSEG.LIB_FR)
when 'fr_FR' then SINQTSEG.LIB_FR
when 'es_SP' then nvl(SINQTSEG.LIB_ES,SINQTSEG.LIB_FR)
else SINQTSEG.LIB_FR

end SINQTSEG__LIB,
    case 'fr_FR'
when 'en_GB' then nvl(SINQTZDS.LIB_EN,SINQTZDS.LIB_FR)
when 'fr_FR' then SINQTZDS.LIB_FR
when 'es_SP' then nvl(SINQTZDS.LIB_ES,SINQTZDS.LIB_FR)
else SINQTZDS.LIB_FR

end SINQTZDS__LIB,
    SINQTSFA.CODE SINQTSFA__CODE,
    case 'fr_FR'
when 'en_GB' then nvl(SINQTSFA.LIB_EN,SINQTSFA.LIB_FR)
when 'fr_FR' then SINQTSFA.LIB_FR
when 'es_SP' then nvl(SINQTSFA.LIB_ES,SINQTSFA.LIB_FR)
else SINQTSFA.LIB_FR

end SINQTSFA__LIB,
    SINQTFAM.CODE SINQTFAM__CODE,
    case 'fr_FR'
when 'en_GB' then nvl(SINQTFAM.LIB_EN,SINQTFAM.LIB_FR)
when 'fr_FR' then SINQTFAM.LIB_FR
when 'es_SP' then nvl(SINQTFAM.LIB_ES,SINQTFAM.LIB_FR)
else SINQTFAM.LIB_FR

end SINQTFAM__LIB,
    SINQTRUB.CODE SINQTRUB__CODE,
    case 'fr_FR'
when 'en_GB' then SINQTRUB.LIB_EN
when 'fr_FR' then SINQTRUB.LIB_FR
when 'es_SP' then SINQTRUB.LIB_ES
else SINQTRUB.LIB_FR

end SINQTRUB__LIB,
    SINQTOPC.CODE SINQTOPC__CODE,
    case 'fr_FR'
when 'en_GB' then SINQTOPC.LIB_EN
when 'fr_FR' then SINQTOPC.LIB_FR
when 'es_SP' then SINQTOPC.LIB_ES
else SINQTOPC.LIB_FR

end SINQTOPC_LIB,
    SINQTCMA.CODE SINQTCMA__CODE,
    case 'fr_FR'
when 'en_GB' then nvl(SINQTCMA.LIB_EN,SINQTCMA.LIB_FR)
when 'fr_FR' then SINQTCMA.LIB_FR
when 'es_SP' then nvl(SINQTCMA.LIB_ES,SINQTCMA.LIB_FR)
else SINQTCMA.LIB_FR

end SINQTCMA__LIB,
    SINQTCMI.CODE SINQTCMI__CODE,
    case 'fr_FR'
when 'en_GB' then nvl(SINQTCMI.LIB_EN,SINQTCMI.LIB_FR)
when 'fr_FR' then SINQTCMI.LIB_FR
when 'es_SP' then nvl(SINQTCMI.LIB_ES,SINQTCMI.LIB_FR)
else SINQTCMI.LIB_FR

end SINQTCMI__LIB,
    Table__54.TYPE_FLOTTE_VD,
    Table__54.TYPE_OPE_ESSOR,
    Table__54.TYP_UTIL_VD,
    Table__54.CODE_PROFESSION_VD,
    Table__54.CODE_PROMO,
    Table__54.CODE_PROMO2,
    SINQTCYR.ANNEE_MOIS,
    sum(Table__54.VOLUME_AJ) VOLUME_AJ,
    sum(Table__54.PRIX_VENTE) PRIX_VENTE,
    sum(Table__54.PRIX_VENTE_AJ) PRIX_VENTE_AJ,
    sum(Table__54.PV_OPTIONS) PV_OPTIONS,
    sum(Table__54.PV_VERSION) PV_VERSION,
    sum(Table__54.MACOM_CONSO) MACOM_CONSO,
    sum(Table__54.MACOM_CONSO_AJ) MACOM_CONSO_AJ,
    sum(Table__54.MACOM_CONSO_VERSION) MACOM_CONSO_VERSION,
    sum(Table__54.MACOM_CONSO_OPTION) MACOM_CONSO_OPTION,
    sum(Table__54.MACOM_ENTITE) MACOM_ENTITE,
    sum(Table__54.MACOM_ENTITE_AJ) MACOM_ENTITE_AJ,
    sum(Table__54.MACOM_ENTITE_VERSION) MACOM_ENTITE_VERSION,
    sum(Table__54.MACOM_ENTITE_OPTION) MACOM_ENTITE_OPTION,
    sum(Table__54.RBCV_AJ) RBCV_AJ,
    sum(Table__54.MCX_VARIABLES) MCX_VARIABLES,
    SINQTMRQ_2.CODE SINQTMRQ_2__CODE
FROM
    BRC10_SINC0.SINQTVIN SINQTVIN,
    BRC10_SINC0.SINQTCLI SINQTCLI_2,
    BRC10_SINC0.SINQTVER SINQTVER,
    BRC10_SINC0.SINQTFV4 Table__54,
    BRC10_SINC0.SINQTCMP SINQTCMP,
    BRC10_SINC0.SINQTCND SINQTCND,
    BRC10_SINC0.SINQTCLI SINQTCLI,
    BRC10_SINC0.SINQTSEG SINQTSEG,
    BRC10_SINC0.SINQTZDS SINQTZDS,
    BRC10_SINC0.SINQTSFA SINQTSFA,
    BRC10_SINC0.SINQTFAM SINQTFAM,
    BRC10_SINC0.SINQTRUB SINQTRUB,
    BRC10_SINC0.SINQTOPC SINQTOPC,
    BRC10_SINC0.SINQTCMA SINQTCMA,
    BRC10_SINC0.SINQTCMI SINQTCMI,
    BRC10_SINC0.SINQTCYR SINQTCYR,
    BRC10_SINC0.SINQTMRQ SINQTMRQ_2,
    BRC10_SINC0.SINQTBAS SINQTBAS,
    BRC10_SINC0.SINQTFAM SINQTFAM_2
WHERE   ( Table__54.DT_VD BETWEEN TO_DATE('01/01/2019', 'dd/MM/yyyy') AND TO_DATE('30/06/2019', 'dd/MM/yyyy'))
    AND ( Table__54.ID_ZDS=SINQTCMP.ID_ZDS and Table__54.ID_CMP=SINQTCMP.ID  )
    AND ( Table__54.ID_ZDS=SINQTCLI.ID_ZDS and Table__54.ID_SCD=SINQTCLI.ID_SCD and Table__54.ID_CLI=SINQTCLI.ID  )
    AND ( Table__54.ID_ZDS=SINQTCMI.ID_ZDS and Table__54.ID_CMI=SINQTCMI.ID  )
    AND ( Table__54.ID_BAS=SINQTBAS.ID  )
    AND ( Table__54.ID_ZDS=SINQTZDS.ID  )
    AND ( Table__54.ID_RUB=SINQTRUB.ID(+)  )
    AND ( Table__54.ID_ZDS=SINQTFAM.ID_ZDS and Table__54.ID_FAM=SINQTFAM.ID  )
    AND ( Table__54.ID_ZDS=SINQTSFA.ID_ZDS and Table__54.ID_SFA=SINQTSFA.ID  )
    AND ( Table__54.ID_ZDS=SINQTVER.ID_ZDS and Table__54.ID_VER=SINQTVER.ID  )
    AND ( Table__54.ID_ZDS=SINQTSEG.ID_ZDS(+) and Table__54.ID_SEG=SINQTSEG.ID(+)  )
    AND ( Table__54.ID_CND=SINQTCND.ID  )
    AND ( Table__54.ID_CMA=SINQTCMA.ID  )
    AND ( Table__54.ID_ZDS=SINQTOPC.ID_ZDS(+) and Table__54.ID_OPC=SINQTOPC.ID(+)  )
    AND ( Table__54.ID_ZDS=SINQTVIN.ID_ZDS and Table__54.ID_VIN=SINQTVIN.ID  )
    AND ( Table__54.ID_CYC=SINQTCYR.ID  )
    AND ( Table__54.ID_ZDS=SINQTCLI_2.ID_ZDS and Table__54.ID_SCD=SINQTCLI_2.ID_SCD and Table__54.ID_CLI_LIV=SINQTCLI_2.ID  )
    AND ( SINQTFAM_2.ID_MRQ_COM=SINQTMRQ_2.ID  )
    AND ( Table__54.ID_ZDS=SINQTFAM_2.ID_ZDS and Table__54.ID_FAM=SINQTFAM_2.ID  )
    AND ( SINQTBAS.CODE  =  'LA' AND SINQTCLI_2.CODE_PAYS_IMPLANT  IN  ( 'FR','DE','PT','BE','IT','ES','GB','NL','PL','AT'  ) AND ( SINQTBAS.CODE != 'EA'  ))
GROUP BY
  SINQTVIN.CODE, 
  SINQTCLI_2.CODE, 
  SINQTCLI_2.CODE_PAYS_IMPLANT, 
  SINQTVER.CODE, 
  case 'fr_FR'
when 'en_GB' then nvl(SINQTVER.LIB_EN,SINQTVER.LIB_FR)
when 'fr_FR' then SINQTVER.LIB_FR
when 'es_SP' then nvl(SINQTVER.LIB_ES,SINQTVER.LIB_FR)
else SINQTVER.LIB_FR

end, 
  Table__54.DT_FACT, 
  Table__54.DT_VD, 
  Table__54.DT_COMM_CLI_FIN_VD, 
  Table__54.DATIMM, 
  SINQTCMP.CODE, 
  case 'fr_FR'
when 'en_GB' then nvl(SINQTCMP.LIB_EN,SINQTCMP.LIB_FR)
when 'fr_FR' then SINQTCMP.LIB_FR
when 'es_SP' then nvl(SINQTCMP.LIB_ES,SINQTCMP.LIB_FR)
else SINQTCMP.LIB_FR

end, 
  SINQTCND.CODE, 
  case 'fr_FR'
when 'en_GB' then nvl(SINQTCND.LIB_EN,SINQTCND.LIB_FR)
when 'fr_FR' then SINQTCND.LIB_FR
when 'es_SP' then nvl(SINQTCND.LIB_ES,SINQTCND.LIB_FR)
else SINQTCND.LIB_FR

end, 
  SINQTCLI.CODE, 
  case 'fr_FR'
when 'en_GB' then nvl(SINQTCLI.LIB_EN,SINQTCLI.LIB_FR)
when 'fr_FR' then SINQTCLI.LIB_FR
when 'es_SP' then nvl(SINQTCLI.LIB_ES,SINQTCLI.LIB_FR)
else SINQTCLI.LIB_FR

end, 
  SINQTSEG.CODE, 
  case 'fr_FR'
when 'en_GB' then nvl(SINQTSEG.LIB_EN,SINQTSEG.LIB_FR)
when 'fr_FR' then SINQTSEG.LIB_FR
when 'es_SP' then nvl(SINQTSEG.LIB_ES,SINQTSEG.LIB_FR)
else SINQTSEG.LIB_FR

end, 
  case 'fr_FR'
when 'en_GB' then nvl(SINQTZDS.LIB_EN,SINQTZDS.LIB_FR)
when 'fr_FR' then SINQTZDS.LIB_FR
when 'es_SP' then nvl(SINQTZDS.LIB_ES,SINQTZDS.LIB_FR)
else SINQTZDS.LIB_FR

end, 
  SINQTSFA.CODE, 
  case 'fr_FR'
when 'en_GB' then nvl(SINQTSFA.LIB_EN,SINQTSFA.LIB_FR)
when 'fr_FR' then SINQTSFA.LIB_FR
when 'es_SP' then nvl(SINQTSFA.LIB_ES,SINQTSFA.LIB_FR)
else SINQTSFA.LIB_FR

end, 
  SINQTFAM.CODE, 
  case 'fr_FR'
when 'en_GB' then nvl(SINQTFAM.LIB_EN,SINQTFAM.LIB_FR)
when 'fr_FR' then SINQTFAM.LIB_FR
when 'es_SP' then nvl(SINQTFAM.LIB_ES,SINQTFAM.LIB_FR)
else SINQTFAM.LIB_FR

end, 
  SINQTRUB.CODE, 
  case 'fr_FR'
when 'en_GB' then SINQTRUB.LIB_EN
when 'fr_FR' then SINQTRUB.LIB_FR
when 'es_SP' then SINQTRUB.LIB_ES
else SINQTRUB.LIB_FR

end, 
  SINQTOPC.CODE, 
  case 'fr_FR'
when 'en_GB' then SINQTOPC.LIB_EN
when 'fr_FR' then SINQTOPC.LIB_FR
when 'es_SP' then SINQTOPC.LIB_ES
else SINQTOPC.LIB_FR

end, 
  SINQTCMA.CODE, 
  case 'fr_FR'
when 'en_GB' then nvl(SINQTCMA.LIB_EN,SINQTCMA.LIB_FR)
when 'fr_FR' then SINQTCMA.LIB_FR
when 'es_SP' then nvl(SINQTCMA.LIB_ES,SINQTCMA.LIB_FR)
else SINQTCMA.LIB_FR

end, 
  SINQTCMI.CODE, 
  case 'fr_FR'
when 'en_GB' then nvl(SINQTCMI.LIB_EN,SINQTCMI.LIB_FR)
when 'fr_FR' then SINQTCMI.LIB_FR
when 'es_SP' then nvl(SINQTCMI.LIB_ES,SINQTCMI.LIB_FR)
else SINQTCMI.LIB_FR

end, 
  Table__54.TYPE_FLOTTE_VD, 
  Table__54.TYPE_OPE_ESSOR, 
  Table__54.TYP_UTIL_VD, 
  Table__54.CODE_PROFESSION_VD, 
  Table__54.CODE_PROMO, 
  Table__54.CODE_PROMO2, 
  SINQTCYR.ANNEE_MOIS, 
  SINQTMRQ_2.CODE
)
"""

In [None]:
dfMADAXPandas = oracle.read_df_from_query(db_uri_cx_oracle, querySAMARA)

In [8]:
dfSAMARA = spark_session.read.option("fetchsize", 10000).jdbc(db_uri, table=querySAMARA).cache()

In [9]:
# dfSAMARA.columns

['SINQTVIN__CODE',
 'SINQTCLI_2__CODE',
 'CODE_PAYS_IMPLANT',
 'SINQTVER__CODE',
 'SINQTVER__LIB',
 'DT_FACT',
 'DT_VD',
 'DT_COMM_CLI_FIN_VD',
 'DATIMM',
 'SINQTCMP__CODE',
 'SINQTCMP__LIB',
 'SINQTCND__CODE',
 'SINQTCND__LIB',
 'SINQTCLI__CODE',
 'SINQTCLI__LIB',
 'SINQTSEG__CODE',
 'SINQTSEG__LIB',
 'SINQTZDS__LIB',
 'SINQTSFA__CODE',
 'SINQTSFA__LIB',
 'SINQTFAM__CODE',
 'SINQTFAM__LIB',
 'SINQTRUB__CODE',
 'SINQTRUB__LIB',
 'SINQTOPC__CODE',
 'SINQTOPC_LIB',
 'SINQTCMA__CODE',
 'SINQTCMA__LIB',
 'SINQTCMI__CODE',
 'SINQTCMI__LIB',
 'TYPE_FLOTTE_VD',
 'TYPE_OPE_ESSOR',
 'TYP_UTIL_VD',
 'CODE_PROFESSION_VD',
 'CODE_PROMO',
 'CODE_PROMO2',
 'ANNEE_MOIS',
 'VOLUME_AJ',
 'PRIX_VENTE',
 'PRIX_VENTE_AJ',
 'PV_OPTIONS',
 'PV_VERSION',
 'MACOM_CONSO',
 'MACOM_CONSO_AJ',
 'MACOM_CONSO_VERSION',
 'MACOM_CONSO_OPTION',
 'MACOM_ENTITE',
 'MACOM_ENTITE_AJ',
 'MACOM_ENTITE_VERSION',
 'MACOM_ENTITE_OPTION',
 'RBCV_AJ',
 'MCX_VARIABLES',
 'SINQTMRQ_2__CODE']

In [10]:
dfSAMARA.count()

KeyboardInterrupt: 

In [117]:
dfSAMARA.select("").distinct().toPandas().head(10)

Unnamed: 0,LIBELLE
0,MANGUALDE
1,
2,RENNES
3,SEVEL NORD
4,MIZUSHIMA
5,VILLAVERDE
6,SEVEL-VAL DI SANGRO
7,MULHOUSE
8,VIGO
9,KOLIN


In [None]:
dfSAMARA = dfSAMARA.drop("", "")