In [None]:
#parameters
sql_database_name = ""
sql_server_name = ""
keyvault_name = ""

In [None]:
sql_username = TokenLibrary.getSecret(keyvault_name, 'jdbcUsername', 'keyvault_linkedservice')
sql_password = TokenLibrary.getSecret(keyvault_name, 'jdbcPassword', 'keyvault_linkedservice')
GremlinEndpoint = TokenLibrary.getSecret(keyvault_name, 'gremlinEndpoint', 'keyvault_linkedservice')
GremlinUsername = TokenLibrary.getSecret(keyvault_name, 'gremlinUsername', 'keyvault_linkedservice')
GremlinPassword = TokenLibrary.getSecret(keyvault_name, 'gremlinPassword', 'keyvault_linkedservice')

In [None]:
from pyspark.sql.types import *
from gremlin_python.driver import client, serializer, protocol
from gremlin_python.driver.protocol import GremlinServerError
import sys
import traceback

In [None]:
def emptyIfNull(s):
    return '' if s is None else str(s)

def ExecGremlinQuery(queryList):
    for query in queryList:
        try:
            GremClient = client.Client(GremlinEndpoint, 'g', username=GremlinUsername, password=GremlinPassword, message_serializer=serializer.GraphSONSerializersV2d0())          
            callback = GremClient.submitAsync(query)

            if callback.result() is not None:
                print("\tObject inserted:\n\t{0}".format(
                    callback.result().all().result()))
            else:
                print("Something went wrong with this query: {0}".format(query))

        except GremlinServerError as e:
            print('Code: {0}, Attributes: {1}'.format(e.status_code, e.status_attributes))

        GremClient.close()

In [None]:
query = """
        SELECT DISTINCT u.[displayName]
            ,u.[reportsTo]
            ,u.[city]
            ,u.[state]
            ,u.[country]
            ,u.[department]
            ,u.[jobTitle]
            ,u.[Id]
        FROM [dbo].[vUsers] u
"""

Person = spark.read.format("jdbc") \
        .option("url", f"jdbc:sqlserver://{sql_server_name}.sql.azuresynapse.net:1433;database={sql_database_name};user={sql_username}@{sql_server_name};password={sql_password};encrypt=true;trustServerCertificate=true;hostNameInCertificate=*.sql.azuresynapse.net;loginTimeout=30;") \
        .option("user", sql_username) \
        .option("password", sql_password) \
        .option("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver") \
        .option("query", query) \
        .load()

PersonSchema = StructType([
    StructField("Name", StringType(), True),
    StructField("ReportsTo", StringType(), True),
    StructField("City", StringType(), True),
    StructField("State", StringType(), True),
    StructField("Country", StringType(), True),
    StructField("Department", StringType(), True),
    StructField("JobTitle", StringType(), True),
    StructField("Id", StringType(), True)
])

PersonDF = spark.createDataFrame(Person.rdd, PersonSchema)

In [None]:
CleanupGraphList = ["g.V().drop()"]

ExecGremlinQuery(CleanupGraphList)

In [None]:
PersonVerticesList = []

for row in PersonDF.collect():
    Name = row.Name
    ReportsTo = emptyIfNull(row.ReportsTo)
    City = emptyIfNull(row.City)
    State = emptyIfNull(row.State)
    Country = emptyIfNull(row.Country)
    Department = emptyIfNull(row.Department)
    JobTitle = emptyIfNull(row.JobTitle)
    Id = emptyIfNull(row.Id)
    PersonVertexQuery = f"g.addV('PERSON').property('id', '{Name}').property('State', '{State}').property('Country', '{Country}').property('Department', '{Department}').property('JobTitle', '{JobTitle}').property('ReportsTo', '{ReportsTo}').property('pk', '{Id}')"

    PersonVerticesList.append(PersonVertexQuery)
    
ExecGremlinQuery(PersonVerticesList)

In [None]:
Cities = PersonDF.select(PersonDF['Name'], PersonDF['City'],PersonDF['Country']).distinct()
CityVerticesList = []
CityEdgesList = []

# Generate Vertices for all Cities
for City in Cities.collect():
    if City.City is not None and City.Name is not None:
        Country = emptyIfNull(City.Country)
        Name = City.Name

        CityQueryVertices = f"g.addV('CITY').property('id', '{City.City}').property('pk', 'city')"
        CityVerticesList.append(CityQueryVertices)
        
        CityQueryEdges = f"g.V('{Name}').addE('IN_CITY').to(g.V('{City.City}'))"
        CityEdgesList.append(CityQueryEdges)       

ExecGremlinQuery(CityVerticesList)
ExecGremlinQuery(CityEdgesList)

In [None]:
EmailQuery = """
            SELECT DISTINCT r.[From_Name]
            ,r.[RecipientName]
            ,r.[Subject]
            ,r.[IsRead]
            ,r.[RecipientType]
            ,r.[MailToManager]
            ,r.[MailToSubordinate]
            ,r.[IsExternalEmail]
            ,r.[CreatedDateTime]
            FROM [dbo].[vEmails_Flattened] r 
            LEFT JOIN [dbo].[vUsers] u ON r.[Recipient] = u.mail
            WHERE u.Id IS NOT NULL
"""

Emails = spark.read.format("jdbc") \
        .option("url", f"jdbc:sqlserver://{sql_server_name}.sql.azuresynapse.net:1433;database={sql_database_name};user={sql_username}@{sql_server_name};password={sql_password};encrypt=true;trustServerCertificate=true;hostNameInCertificate=*.sql.azuresynapse.net;loginTimeout=30;") \
        .option("user", sql_username) \
        .option("password", sql_password) \
        .option("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver") \
        .option("query", EmailQuery) \
        .load()

EmailSchema = StructType([
    StructField("FromName", StringType(), False),
    StructField("RecipientName", StringType(), False),
    StructField("Subject", StringType(), True),
    StructField("IsRead", BooleanType(), True),
    StructField("RecipientType", StringType(), True),
    StructField("MailToManager", BooleanType(), True),
    StructField("MailToSubordinate", BooleanType(), True),
    StructField("IsExternalEmail", BooleanType(), True),
    StructField("CreatedDateTime", TimestampType(), True)
])

EmailsDF = spark.createDataFrame(Emails.rdd, EmailSchema)

In [None]:
EmailEdgesList = []

for row in EmailsDF.collect():
    FromName = row.FromName
    RecipientName = row.RecipientName
    Subject = emptyIfNull(row.Subject)
    IsRead = emptyIfNull(row.IsRead)
    RecipientType = emptyIfNull(row.RecipientType)
    MailToManager = emptyIfNull(row.MailToManager)
    MailToSubordinate = emptyIfNull(row.MailToSubordinate)
    IsExternalEmail = emptyIfNull(row.IsExternalEmail)
    CreatedDateTime = emptyIfNull(row.CreatedDateTime)
    if RecipientType == 'To':
        EmailEdgeQuery = f"g.V('{FromName}').addE('SENT_EMAIL_TO').to(g.V('{RecipientName}')).property('Subject', '{Subject}').property('IsRead', '{IsRead}').property('RecipientType', '{RecipientType}').property('MailToManager', '{MailToManager}').property('MailToSubordinate', '{MailToSubordinate}').property('IsExternalEmail', '{IsExternalEmail}').property('CreatedDateTime', '{CreatedDateTime}')"

    else:
        EmailEdgeQuery = f"g.V('{FromName}').addE('CC_EMAIL_TO').to(g.V('{RecipientName}')).property('Subject', '{Subject}').property('IsRead', '{IsRead}').property('RecipientType', '{RecipientType}').property('MailToManager', '{MailToManager}').property('MailToSubordinate', '{MailToSubordinate}').property('IsExternalEmail', '{IsExternalEmail}').property('CreatedDateTime', '{CreatedDateTime}')"

    EmailEdgesList.append(EmailEdgeQuery)

ExecGremlinQuery(EmailEdgesList)

In [None]:
ChatQuery = """
                SELECT DISTINCT t.[From_Name]
                ,t.[RecipientName]
                ,t.[Subject]
                ,t.[BodyPreview]
                ,t.[Importance]
                ,t.[IsRead]
                ,t.[ChatWithManager]
                ,t.[ChatWithSubordinate]
                ,t.[IsExternalChat]
                ,t.[CreatedDateTime]
                FROM [dbo].[vTeamChats_Flattened] t
"""

Chats = spark.read.format("jdbc") \
        .option("url", f"jdbc:sqlserver://{sql_server_name}.sql.azuresynapse.net:1433;database={sql_database_name};user={sql_username}@{sql_server_name};password={sql_password};encrypt=true;trustServerCertificate=true;hostNameInCertificate=*.sql.azuresynapse.net;loginTimeout=30;") \
        .option("user", sql_username) \
        .option("password", sql_password) \
        .option("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver") \
        .option("query", ChatQuery) \
        .load()

ChatSchema = StructType([
    StructField("FromName", StringType(), False),
    StructField("RecipientName", StringType(), False),
    StructField("Subject", StringType(), True),
    StructField("BodyPreview", StringType(), True),
    StructField("Importance", StringType(), True),
    StructField("IsRead", BooleanType(), True),
    StructField("ChatWithManager", BooleanType(), True),
    StructField("ChatWithSubordinate", BooleanType(), True),
    StructField("IsExternalChat", BooleanType(), True),
    StructField("CreatedDateTime", TimestampType(), True)
])

ChatsDF = spark.createDataFrame(Chats.rdd, ChatSchema)

In [None]:
ChatEdgesList = []

for row in ChatsDF.collect():
    FromName = row.FromName
    RecipientName = row.RecipientName
    Subject = emptyIfNull(row.Subject)
    BodyPreview = emptyIfNull(row.BodyPreview)
    Importance = emptyIfNull(row.Importance)
    IsRead = emptyIfNull(row.IsRead)
    ChatWithManager = emptyIfNull(row.ChatWithManager)
    ChatWithSubordinate = emptyIfNull(row.ChatWithSubordinate)
    IsExternalChat = emptyIfNull(row.IsExternalChat)
    CreatedDateTime = emptyIfNull(row.CreatedDateTime)
    ChatEdgeQuery = f"g.V('{FromName}').addE('SENT_CHAT_TO').to(g.V('{RecipientName}')).property('Subject', '{Subject}').property('BodyPreview', '{BodyPreview}').property('Importance', '{Importance}').property('IsRead', '{IsRead}').property('ChatWithManager', '{ChatWithManager}').property('ChatWithSubordinate', '{ChatWithSubordinate}').property('IsExternalChat', '{IsExternalChat}').property('CreatedDateTime', '{CreatedDateTime}')"
    ChatEdgesList.append(ChatEdgeQuery)

ExecGremlinQuery(ChatEdgesList)