In [1]:
# Excel file shouldn't be open. 
# CSV files shouldn't be open.

In [2]:
# Required libraries

In [3]:
import pandas as pd
from pyspark.sql import SparkSession
from pyspark.context import SparkContext
from pyspark.sql.types import *
from pyspark.sql.functions import *

In [4]:
# creating a dictionary to get all the sheets
df_dict = pd.read_excel('./data.xlsx', sheet_name=None)

In [5]:
# Using the sheet names I save the dfs and then I export them into csv file in the same directory
sheets = ['homologacion_pais','homologacion_rating','rating_empresa','rating_soberano']

for sheet in sheets:
    df = df_dict.get(sheet)
    df.to_csv('./'+sheet+'.csv', sep='|')

In [6]:
sc = SparkSession.builder.appName("PysparkDeloitte")\
.config ("spark.sql.shuffle.partitions", "50")\
.config("spark.driver.maxResultSize","5g")\
.config ("spark.sql.execution.arrow.enabled", "true")\
.getOrCreate()

In [7]:
#Creates a spark data frame called as raw_data.
#CSV FILES# 
df_hp = sc.read.csv(sheets[0]+'.csv', sep='|', header=True)
df_hr = sc.read.csv(sheets[1]+'.csv', sep='|', header=True)
df_re = sc.read.csv(sheets[2]+'.csv', sep='|', header=True)
df_rs = sc.read.csv(sheets[3]+'.csv', sep='|', header=True)

In [32]:
# rating_empresa needs to integrate pais, rating_empresa and then rating_soberano

In [33]:
df_hp.createOrReplaceTempView("H_PAIS")
df_hr.createOrReplaceTempView("H_RATING")
df_re.createOrReplaceTempView("R_EMPRESA")
df_rs.createOrReplaceTempView("R_SOBERANO")

In [34]:
# First: we integrate PAIS into the dataframe df_re that we are going to use as a master
df_master = df_re.join(df_hp, df_re.pais_bbg == upper(df_hp.pais_bbg), "inner")
df_master.createOrReplaceTempView("MASTER")

In [48]:
df_master_noagg = sc.sql("select rut, dv, nombre, pais, mdy as rating, 'MDY' as agencia from MASTER union all select rut, dv, nombre, pais, sp as rating, 'SP' as agencia from MASTER union all select rut, dv, nombre, pais, fitch as rating, 'FITCH' as agencia from MASTER")
df_master_noagg.createOrReplaceTempView("MASTER_NOAGG")
df_master_noagg.show()

+--------+---+--------------------+--------------+------+-------+
|     rut| dv|              nombre|          pais|rating|agencia|
+--------+---+--------------------+--------------+------+-------+
|41001042|  9|        UBS AG SUIZA|         suiza|   Aa2|    MDY|
|42002576|  9|BANCOBCO COLPATRI...|      colombia|  Baa2|    MDY|
|40000098|  0|    BBVA Colombia SA|      colombia|  Baa1|    MDY|
|41001047|  K|BCO DE OCCIDENTE ...|      colombia|    NR|    MDY|
|41001110|  7|             BANCAFE|      colombia|  Baa2|    MDY|
|41001068|  2|          BCO BOGOTA|      colombia|  Baa2|    MDY|
|44000136|  K|China Merchants B...|       bahamas|    A3|    MDY|
|41203019|  2|BCO MERCANTIL DEL...|        mexico|  Baa1|    MDY|
|40000116|  2|BCO DE CREDITO DE...|          peru|    A3|    MDY|
|40000032|  8|BCO CONTINENTAL LIMA|          peru|    A3|    MDY|
|41001363|  0|BCO SAFRA SA CAMP...|        brasil|   Ba1|    MDY|
|41001310|  K|BCO DO BRASIL S A...|        brasil|   Ba1|    MDY|
|42001513|

In [41]:
df_master_noagg_on = sc.sql('select M.*, HR.orden_norma, HR.rating_norma from MASTER_NOAGG M LEFT JOIN H_RATING HR ON M.RATING=HR.RATING AND M.AGENCIA=HR.AGENCIA_HOMOL')
df_master_noagg_on.createOrReplaceTempView("MASTER_NOAGG_ON")
df_master_noagg_on.show()

+--------+---+--------------------+--------------+------+-------+-----------+------------+
|     rut| dv|              nombre|          pais|rating|agencia|orden_norma|rating_norma|
+--------+---+--------------------+--------------+------+-------+-----------+------------+
|41001042|  9|        UBS AG SUIZA|         suiza|   Aa2|    MDY|          3|          AA|
|42002576|  9|BANCOBCO COLPATRI...|      colombia|  Baa2|    MDY|          9|         BBB|
|40000098|  0|    BBVA Colombia SA|      colombia|  Baa1|    MDY|          8|        BBB+|
|41001047|  K|BCO DE OCCIDENTE ...|      colombia|    NR|    MDY|       null|        null|
|41001110|  7|             BANCAFE|      colombia|  Baa2|    MDY|          9|         BBB|
|41001068|  2|          BCO BOGOTA|      colombia|  Baa2|    MDY|          9|         BBB|
|44000136|  K|China Merchants B...|       bahamas|    A3|    MDY|          7|          A-|
|41203019|  2|BCO MERCANTIL DEL...|        mexico|  Baa1|    MDY|          8|        BBB+|

In [43]:
df_master_noagg_on_ranked = sc.sql('select *, rank() OVER(PARTITION BY rut,dv,nombre,pais ORDER BY orden_norma DESC) as rank from MASTER_NOAGG_ON')
df_master_noagg_on_ranked.createOrReplaceTempView("MASTER_NOAGG_ON_RANKED")
df_master_noagg_on_ranked.show()

+--------+---+--------------------+--------------+------+-------+-----------+------------+----+
|     rut| dv|              nombre|          pais|rating|agencia|orden_norma|rating_norma|rank|
+--------+---+--------------------+--------------+------+-------+-----------+------------+----+
|40000008|  5|SOCIETE GENERAL P...|       francia|    A-|  FITCH|          7|          A-|   1|
|40000008|  5|SOCIETE GENERAL P...|       francia|     A|     SP|          6|           A|   2|
|40000008|  5|SOCIETE GENERAL P...|       francia|    A1|    MDY|          5|          A+|   3|
|40000016|  6|DEUTSCHE BANK AG-...|      alemania|   BBB|  FITCH|          9|         BBB|   1|
|40000016|  6|DEUTSCHE BANK AG-...|      alemania|  BBB+|     SP|          8|        BBB+|   2|
|40000016|  6|DEUTSCHE BANK AG-...|      alemania|    A3|    MDY|          7|          A-|   3|
|40000032|  8|BCO CONTINENTAL LIMA|          peru|  BBB+|     SP|          8|        BBB+|   1|
|40000032|  8|BCO CONTINENTAL LIMA|     

In [49]:
df_master2 = sc.sql('select rut,dv,nombre,pais,rating_norma as rating_empresa from MASTER_NOAGG_ON_RANKED where rank = 1').dropDuplicates()
df_master2.createOrReplaceTempView("MASTER2")
df_master2.show()

+--------+---+--------------------+--------------+--------------+
|     rut| dv|              nombre|          pais|rating_empresa|
+--------+---+--------------------+--------------+--------------+
|40000008|  5|SOCIETE GENERAL P...|       francia|            A-|
|40000016|  6|DEUTSCHE BANK AG-...|      alemania|           BBB|
|40000032|  8|BCO CONTINENTAL LIMA|          peru|          BBB+|
|40000039|  5|RAIFFEISEN ZENTRA...|       austria|            A-|
|40000070|  0|BBANK OF AMERICA ...|estados unidos|            A+|
|40000098|  0|    BBVA Colombia SA|      colombia|           BBB|
|40000116|  2|BCO DE CREDITO DE...|          peru|          BBB+|
|40000154|  5|BNP PARIBAS S.A. ...|       francia|            A+|
|40000177|  4|   ITAU UNIBANCO S-A|        brasil|           BB-|
|40000315|  7|LLOYS BANK INTERN...|   reino unido|          null|
|40000355|  6|BAYERISCHE LANDES...|estados unidos|            A-|
|40000412|  9|       HSBC Bank PLC|   reino unido|            A+|
|41001007|

In [54]:
# rating_soberano cleaning
# rating_soberano spliting
# join entre rating_soberano splited y rating_soberano
df_rs.show()

+---+--------------------+------------------+--------------------+--------------------+--------------------+--------------------+
|_c0|            pais_bbg|            ticker|              region|                  sp|               fitch|                 mdy|
+---+--------------------+------------------+--------------------+--------------------+--------------------+--------------------+
|  0|           Abu Dhabi|1022337Z UH Equity|  Africa/Middle East|                  AA|                  AA|                 Aa2|
|  1|             Albania|   1001Z AL Equity|      Eastern Europe|                  B+|                null|                  B1|
|  2|             Algeria|   3700Z US Equity|  Africa/Middle East|                null|                null|                null|
|  3|             Andorra|   1095Z SM Equity|      Western Europe|                 BBB|                null|                null|
|  4|              Angola|  32272Z AX Equity|  Africa/Middle East|                CCC+|   

In [58]:
df_rs_wu = sc.sql('select pais_bbg, REPLACE(sp,"u","") as SP, REPLACE(fitch,"u","") as FITCH, REPLACE(mdy,"u","") as MDY from R_SOBERANO')
df_rs_wu.createOrReplaceTempView("R_SOBERANO_WU")
df_rs_wu.show()

+--------------------+--------------------+--------------------+--------------------+
|            pais_bbg|                  SP|               FITCH|                 MDY|
+--------------------+--------------------+--------------------+--------------------+
|           Abu Dhabi|                  AA|                  AA|                 Aa2|
|             Albania|                  B+|                null|                  B1|
|             Algeria|                null|                null|                null|
|             Andorra|                 BBB|                null|                null|
|              Angola|                CCC+|                 CCC|                Caa1|
|           Argentina|                CCC+|                  WD|                  Ca|
|             Armenia|                null|                  B+|                 Ba3|
|               Aruba|                 BBB|                  BB|                null|
|           Australia|                 AAA|           

In [None]:
sc.sql('select pais_bbg, SP as rating, "" from R_SOBERANO_WU')