# Intro

This notebook creates the view for aggregate job titles


## Change History

<style>
  table {margin-left: 0 !important;}
</style>

| Date    | Author | Description |
| :-------- | :------- | :------- | 
|2025-02-12 | Mclain R |  Created Date|

In [61]:
CREATE OR ALTER VIEW v_hubspot_aggregate_job_titles AS

-- split contact_role and dedupe so that there is one row for each individual email/job_title. Helps optimize query.
WITH dcrm_deduped AS (
    SELECT DISTINCT
        email,
        TRIM(value) AS job_title
    FROM [bronze_lakehouse].[dbo].[nucleus__dcrm_contacts]
    CROSS APPLY STRING_SPLIT(contact_role, ';')
    WHERE contact_role IS NOT NULL
        AND email IS NOT NULL
        AND email LIKE '%@%'
),

-- split contact_title and dedupe so that there is one row for each individual email/job_title. Helps optimize query.
icrm_deduped AS (
    SELECT DISTINCT
        email,
        TRIM(value) AS job_title
    FROM [bronze_lakehouse].[dbo].[nucleus__icrm_contacts]
    CROSS APPLY STRING_SPLIT(contact_title, ';')
    WHERE contact_title IS NOT NULL
        AND email IS NOT NULL
        AND email LIKE '%@%'
),

-- Union to make one table as well a remove special character and nulls.
combined AS (
	SELECT
		email,
		REPLACE(job_title, '�', '') AS job_title
	FROM dcrm_deduped
	where job_title != 'null'

	UNION

	SELECT
		email,
		REPLACE(job_title, '�', '') AS job_title
	FROM icrm_deduped
	where job_title != 'null'
),

-- create array for aggregated job titles per email
combined_agg AS (
    SELECT
        email,
        '[' + STRING_AGG('"' + job_title + '"', ', ') + ']' AS aggregated_job_titles
    FROM combined
    GROUP BY email
),

-- pre-filter hubspot contacts
hubspot AS (
	SELECT DISTINCT
		property_email
	FROM [silver_lakehouse].[dbo].[hubspot__contact]
	WHERE property_email is not null
		AND property_email != ''
)

-- final select. Using left join so that hubspot contacts without aggregated job titles are null,
-- which census will replace any existing values with nulls.
SELECT
    h.property_email as email,
    c.aggregated_job_titles
FROM hubspot h
LEFT JOIN combined_agg c
    ON c.email = h.property_email