In [None]:
from pyspark.sql.functions import col, when

In [None]:
data_lake_account_name = '' # Synapse Workspace ADLS
file_system_name = 'relmeshadlsfs'

In [None]:
sf_accounts_path = f'abfss://{file_system_name}@{data_lake_account_name}.dfs.core.windows.net/createdsfdata/accounts_sf.csv'
mapping_file = f'abfss://{file_system_name}@{data_lake_account_name}.dfs.core.windows.net/columnmappinngs/Account_Column_Mapping_SF.csv'

df_account_sf = spark.read.load(sf_accounts_path, format='csv',header=True)
df_acc_map_sf = spark.read.load(mapping_file, format='csv',header=True).toPandas()

col_map = dict(zip(df_acc_map_sf.SFColumn, df_acc_map_sf.SAColumn))

df_account_sf = df_account_sf.select(*[col(k).alias(col_map[k]) for k in col_map])

sa_acc_cols = ['Id','Name','DomainName','ParentAccount','PrimaryContact','Industry','Sector','TransactionSize','Tier','Type']
df_account_sf = df_account_sf.select(*sa_acc_cols)
#display(df_account_sf.head(2))

In [None]:
sf_contacts_path = f'abfss://{file_system_name}@{data_lake_account_name}.dfs.core.windows.net/createdsfdata/contacts_sf.csv'
mapping_file = f'abfss://{file_system_name}@{data_lake_account_name}.dfs.core.windows.net/columnmappinngs/Contact_Column_Mapping_SF.csv'

df_contact_sf = spark.read.load(sf_contacts_path, format='csv',header=True)
df_contact_map_sf = spark.read.load(mapping_file, format='csv',header=True).toPandas()

col_map = dict(zip(df_contact_map_sf.SFColumn, df_contact_map_sf.SAColumn))

df_contact_sf = df_contact_sf.select(*[col(k).alias(col_map[k]) for k in col_map])

sa_contact_cols = ['Id','AccountId','FirstName','LastName','Name','Email','Title','ContactType__c']
df_contact_sf = df_contact_sf.select(*sa_contact_cols)
# display(df_contact_sf.head(2))

In [None]:
df_acc = df_account_sf.filter(df_account_sf.Type == 'Account').select('Id','Name','DomainName','ParentAccount','PrimaryContact','Industry','Sector','TransactionSize','Tier')
df_parent_acc = df_account_sf.filter(df_account_sf.Type == 'ParentAccount').select('Id','Name','DomainName')
df_acc.write.mode("overwrite").saveAsTable("account")
df_parent_acc.write.mode("overwrite").saveAsTable("parentaccount")

In [None]:
df_contact = df_contact_sf.filter(df_contact_sf.ContactType__c == 'Contact').select('Id','AccountId','FirstName','LastName','Name','Email','Title')
df_employee = df_contact_sf.filter(df_contact_sf.ContactType__c == 'Employee').select('Id','FirstName','LastName','Name','Email','Title')

df_contact = df_contact.withColumn('Exec_Flag', \
    when((col("Title") == 'CEO') | (col("Title") == 'CTO') | (col("Title") == 'CIO') | (col("Title") == 'CFO'), 1) \
    .otherwise(0)
)

df_contact.write.mode("overwrite").saveAsTable("contact")
df_employee.write.mode("overwrite").saveAsTable("employee")

In [None]:
sql_stmt = '''
    Select c.AccountId as Account_Id, a.Name as Account_Name,
    a.PrimaryContact as Account_Primary_Caller__c, 'Q1' as Account_Timing_Quarter__c, 
    '2021' as Account_Timing__c, 'Tier1' as Account_Industry_Tier__c,
    a.Industry, a.Sector,
    c.Id as Contact_Id, c.FirstName, c.LastName, c.Name as Full_Name__c,Title, lower(Email) as Email,
    0 as IsBoardMember, Exec_Flag as IsExec
    from account as a 
    inner join contact as c on a.Id = c.AccountId
    where Email is not null
'''
df = spark.sql(sql_stmt)
df.write.mode("overwrite").saveAsTable("SellersExtContacts")

In [None]:
sql_stmt = '''
SELECT Contact_Id, Full_Name__c, Title,IsBoardMember,IsExec
from SellersExtContacts
where Account_Id is not null
'''

df = spark.sql(sql_stmt)
df.write.mode("overwrite").saveAsTable("ExtContacts")

In [None]:
sql_stmt = '''select  a.Id as Account_Id, PrimaryContact as Contact_Id,
            c.Name as Full_Name__c, lower(Email) as Email
            FROM Account as a 
            inner join Employee as c on a.PrimaryContact = c.Id'''

df_primarycallers = spark.sql(sql_stmt)
df_primarycallers.write.mode("overwrite").saveAsTable("PrimaryCallers")

In [None]:
#HPTs list
sql_stmt = '''SELECT a.Id,Name,Industry,Sector,'Tier1' as Industry_Tier__c,'2021' as Timing__c, 'Q1' as Timing_Quarter__c,
            '10000' as Est_Transaction_Size__c,
			p.Contact_Id as Primary_Caller_Id,p.Full_Name__C as Primary_Caller
			FROM Account as a 
			inner join PrimaryCallers as p on p.Account_Id = a.Id'''

df_netgraphAccounts = spark.sql(sql_stmt)
df_netgraphAccounts.write.mode("overwrite").saveAsTable("HighPriorityTargets")