In [None]:
# Install the dependencies
!pip install gremlinpython

In [None]:
# Read in the Excel file
import pandas as pd
df = pd.read_excel("DataWeekender-SessionList.xlsx",converters={'Time':str})
df.info()

In [None]:
# Look at unique labels
df.Label.unique()

In [None]:
# Build the vertices (nodes) - 
VERTICES = []
EDGES = []

In [None]:
# Anything not a talk has a name and a type
non_talks = df[df.Label!='Talk']
for index,non_talk in non_talks.iterrows():
    VERTICES.append(f"g.addV('{non_talk.Label}').property('name','{non_talk['Node Name']}').property('type','{non_talk.Label}')")

In [None]:
# Talks are split in Excel with multiple lines for Topic
# So clean them up to be distinct vertices
talks = df[df.Label=='Talk']
talks = talks.drop(columns=['Topic'])
talks = talks.drop_duplicates()
# Talks have name, track, and time
for index,talk in talks.iterrows():
    VERTICES.append(f"g.addV('Talk').property('name','{talk['Node Name']}').property('type','Talk').property('Track',{talk['Track']}).property('Time','{talk['Time']}')")

In [None]:
# Uncomment the line below to look at the Gremlin for adding vertices
# VERTICES

In [None]:
# Build the relationships
EDGES.append("g.V().has('name','You').addE('interested in').to(g.V().hasLabel('Topic').has('name','Azure')")

In [None]:
# Add DataWeekender to topics
topics = df[df.Label=='Topic']
for index, topic in topics.iterrows():
    EDGES.append(f"g.V().has('name','DataWeekender').addE('covers').to(g.V().hasLabel('Topic').has('name','{topic['Node Name']}')")

In [None]:
# Talks with Topics
talks_with_topics = df[pd.notnull(df['Topic'])]
for index, talk in talks_with_topics.iterrows():
    EDGES.append(f"g.V().hasLabel('Topic').has('name','{talk['Topic']}').addE('contains').to(g.V().hasLabel('Talk').has('name','{talk['Node Name']}'))")

In [None]:
# Uncomment the line below to look at the Gremlin for adding edges
# EDGES

In [None]:
from gremlin_python.driver import client
# CONFIG STUFF - YOU NEED TO EDIT THIS
# Make sure to create your Cosmos DB Gremlin API endpoint at https://portal.azure.com
# Create the Database and Collection in the portal
# This script will populate the data that we use in our demo
ENDPOINT = 'YOUR_ENDPOINT.gremlin.cosmosdb.azure.com'
PRIMARY_KEY = 'YOUR_PRIMARY_KEY'
DATABASE = 'YOUR_DATABASE_NAME'
COLLECTION = 'YOUR_COLLECTION_NAME'

In [None]:
# Need the nest_asyncio to help with Jupyter Notebook handling this
import nest_asyncio
nest_asyncio.apply()

# Now let's do this
# This code is based off of this blog post: https://www.taygan.co/blog/2018/01/23/azure-cosmos-db-graph-api-with-python
from gremlin_python.driver import client, serializer

def cleanup_graph(gremlin_client):    
    callback = gremlin_client.submitAsync("g.V().drop()")
    if callback.result() is not None:
        print("Cleaned up the graph!")

def insert_vertices(gremlin_client):
    for vertex in VERTICES:
        callback = gremlin_client.submitAsync(vertex)
        if callback.result() is None:            
            print("Something went wrong with this query: {0}".format(vertex))

def insert_edges(gremlin_client):
    for edge in EDGES:
        callback = gremlin_client.submitAsync(edge)
        if callback.result() is None:            
            print("Something went wrong with this query:\n{0}".format(edge))

def handler():
    # Initialize client
    print('Initializing client...')
    # GraphSON V2 is called out here, as V3 is not supported yet
    gremlin_client = client.Client(
        'wss://' + ENDPOINT + ':443/', 'g',
        username="/dbs/" + DATABASE + "/colls/" + COLLECTION,
        password=PRIMARY_KEY,
        message_serializer=serializer.GraphSONSerializersV2d0()
    )
    print('Client initialized!')

    # Purge graph
    cleanup_graph(gremlin_client)

    # Insert vertices (nodes)
    insert_vertices(gremlin_client)

    # Insert edges (relationships)
    insert_edges(gremlin_client)

    print('Finished!')

if __name__ == '__main__':
    handler()