In [1]:
%%pyspark

from pyspark.sql import SparkSession

# Initialize the Spark session with the specific configuration to improve join performance
spark = SparkSession.builder \
    .appName("Optimized Joins") \
    .config("spark.advise.nonEqJoinConvertRule.enable", "true") \
    .getOrCreate()

StatementMeta(, 68b84ccb-befd-4161-9dbb-b8ae3803cef3, 3, Finished, Available)

In [2]:
%%sql
CREATE OR REPLACE TABLE nucleus_accounts_for_hubspot_companies_association
USING DELTA
LOCATION 'Tables/nucleus_accounts_for_hubspot_companies_association' AS
SELECT
h.id,
c.name,
c.icrm_account_id,
c.dealer_id_pdn,
c.dcrm_account_name,
c.dcrm_account_id,
c.dcrm_parent_account,
c.dcrm_group_id,
c.dcrm_dealer_status,
c.dcrm_oem,
c.dcrm_make,
c.dcrm_additional_attributes,
c.dcrm_rbc,
c.dcrm_group_namne,
c.ae_csg_user_id,
c.ae_autofi_user_id,
c.dealer_id_bac,
c.icrm_parent_bac,
c.icrm_parent_account,
c.icrm_dealer_id_pdn,
c.icrm_dealer_group_code,
c.icrm_dealer_group_name,
c.icrm_dealer_status,
c.icrm_region_description,
c.icrm_sales_area_description,
c.icrm_dealer_type,
c.icrm_makes,
c.icrm_f_i_relationship,
c.icrm_f_i_territory_description,
c.icrm_account_dba_name
from nucleus_accounts_for_hubspot_companies c
inner join silver.hubspot_company h on (h.property_icrm_dealer_id_pdn = c.icrm_dealer_id_pdn or h.property_dealer_id_pdn = c.dealer_id_pdn)
where h.id is not null and h._fivetran_deleted = 0 and h.is_deleted = 0

StatementMeta(, 68b84ccb-befd-4161-9dbb-b8ae3803cef3, 4, Finished, Available)

<Spark SQL result set with 0 rows and 0 fields>

In [3]:
%%sql
CREATE OR REPLACE TEMPORARY VIEW temp_hubspot_contacts_with_hs_id AS
SELECT
    h.id,
    c.email,
    c.firstname,
    c.lastname,
    c.hs_persona,  -- Use the result of the subquery
    c.dealer_id_bac,
    c.dealer_id,
    c.icrm_account_id,
    c.icrm_dealer_group_code,
    c.icrm_contact_id,
    c.jobtitle,
    c.import_pdn,
    c.dcrm_account_id,
    c.dcrm_group_id,
    c.dcrm_contact_id,
    c.import_contact_company_name,
    c.dealer_status,
    c.dealership_dba_name,
    c.dealer_region,
    c.sales_area,
    c.f_i_territory,
    c.mobilephone,
    c.dealership_phone_number
FROM 
    (
        SELECT *,
            CASE
                WHEN jobtitle IN ('CEO', 'CFO', 'CFO/Controller', 'Comptroller', 'Controller', 'Dealer Principal', 'Owner', 'President', 'Owner/Principal') THEN 'Owner/Principal'
                WHEN jobtitle IN ('Ally Systems Authorizer', 'Assist Service Manager', 'Branch Manager', 'Business Manager', 'Fixed Operations Manager', 'Garage Insurance Decision Maker', 'General Manager', 'General Sales Manager', 'GM Warranty Manager', 'Inventory Manager', 'Manager', 'Office Manager', 'Operations Director', 'Treasurer', 'Vice President') THEN 'General Manager'
                WHEN jobtitle IN ('Credit Admin Contact', 'DS-Decision Maker', 'DS-Gate Keeper', 'F&I Manager', 'Finance Director', 'GAP/Aftermarket Cancellation', 'Used Car F&I Manager', 'Warranty Administrator', 'Finance Manager') THEN 'Finance Manager'
                WHEN jobtitle IN ('Fleet/Commercial Admin', 'Fleet/Commercial Manager', 'Fleet/Commercial Sales', 'New Car Manager', 'Sales Manager', 'Sales Staff', 'SmartAuction Buyer', 'SmartAuction Seller', 'SmartAuction User', 'Used Car Manager') THEN 'Sales Manager'
                WHEN jobtitle IN ('CIO', 'Claims IT Manager', 'Clearlane Leads', 'Compliance Officer', 'COVID-Communications', 'Held Offering Contact', 'Human Resources Manager', 'Internet Manager', 'Marketing Manager', 'Office Staff', 'Parts Manager', 'Service Advisor', 'Service Director', 'Service Manager', 'Shop Foreman', 'Targeted Messaging Contact', 'Title Clerk') THEN 'Never Targeted'
                WHEN jobtitle = 'Other' THEN 'Other'
                WHEN jobtitle = 'No Longer Employed' THEN 'No Longer Employed'
                ELSE 'Unknown'
            END AS hs_persona
        FROM nucleus_contacts_for_hubspot_contacts
    ) c
LEFT JOIN silver.hubspot_contact h ON h.property_email = c.email
WHERE h.id IS NOT NULL and h._fivetran_deleted = 0 and h.is_deleted = 0

StatementMeta(, 68b84ccb-befd-4161-9dbb-b8ae3803cef3, 5, Finished, Available)

<Spark SQL result set with 0 rows and 0 fields>

In [4]:
%%sql
create or replace table nucleus_contacts_for_hubspot_contacts_association
using delta
location 'Tables/nucleus_contacts_for_hubspot_contacts_association' as
select
    id,
    email,
    firstname,
    lastname,
    CAST(dealer_id_bac AS INT) AS dealer_id_bac,
    CAST(dealer_id AS INT) AS dealer_id,
    icrm_account_id,
    CAST(icrm_dealer_group_code AS INT) AS icrm_dealer_group_code,
    icrm_contact_id,
    jobtitle,
    CAST(import_pdn AS varchar(5)) AS import_pdn,
    dcrm_account_id,
    CAST(dcrm_group_id AS INT) AS dcrm_group_id,
    dcrm_contact_id,
    import_contact_company_name,
    dealer_status,
    dealership_dba_name,
    dealer_region,
    sales_area,
    f_i_territory,
    mobilephone,
    dealership_phone_number,
    hs_persona
from temp_hubspot_contacts_with_hs_id


StatementMeta(, 68b84ccb-befd-4161-9dbb-b8ae3803cef3, 6, Finished, Available)

<Spark SQL result set with 0 rows and 0 fields>

In [5]:
%%sql
create or replace temporary VIEW pre_association as 
select
cont.id as from_id
,comp.id as to_id
,cont.hs_persona
--,hub.lable as hubspot_label
--,hub.id as hubspot_code
--,hub.category as hubspot_type
from nucleus_contacts_for_hubspot_contacts_association cont
inner join nucleus_accounts_for_hubspot_companies_association comp on (comp.icrm_account_id = cont.icrm_account_id or comp.dcrm_account_id = cont.dcrm_account_id)
where cont.id is not null
    and comp.id is not null
    and cont.hs_persona in ("Finance Manager", "Owner/Principal", "Unknown", "General Manager", "Sales Manager", "Never Targeted", "No Longer Employed", "Other")

StatementMeta(, 68b84ccb-befd-4161-9dbb-b8ae3803cef3, 7, Finished, Available)

<Spark SQL result set with 0 rows and 0 fields>

In [6]:
%%sql
create or replace table hubspot_associations
using delta
location 'Tables/hubspot_associations' as
SELECT distinct
hub.from_object_type
,v.from_id
,hub.to_object_type
,v.to_id
,hub.name as hubspot_label
,hub.id as hubspot_code
,hub.category as hubspot_type
from pre_association v
left join silver.hubspot_association_type hub on v.hs_persona = hub.label
WHERE hub.from_object_type IS NOT NULL
    AND v.from_id IS NOT NULL
    AND hub.to_object_type IS NOT NULL
    AND v.to_id IS NOT NULL
    AND hub.name IS NOT NULL
    AND hub.id IS NOT NULL
    AND hub.category IS NOT NULL

StatementMeta(, 68b84ccb-befd-4161-9dbb-b8ae3803cef3, 8, Finished, Available)

<Spark SQL result set with 0 rows and 0 fields>

In [7]:
%%sql
select count(*) from hubspot_associations

StatementMeta(, 68b84ccb-befd-4161-9dbb-b8ae3803cef3, 9, Finished, Available)

<Spark SQL result set with 1 rows and 1 fields>