# DB2-Salesforce connector: Tools mapping to users

In [1]:
# API settings
api_url = '/services/data/v43.0/sobjects'
external_id = 'Name'
object_id = 'ContactToolAssociation__c'

from nanoHUB.application import Application

application = Application.get_instance()
nanohub_db = application.new_db_engine('nanohub')
nanohub_metrics_db = application.new_db_engine('nanohub_metrics')


salesforce = application.new_salesforce_engine()
db_s = salesforce

[1mnanoHUB - Serving Students, Researchers & Instructors[0m
Obtained Salesforce access token ...... True


In [2]:
import pandas as pd
import time
import datetime

## Obtain tool information from DB2 

In [None]:
# connect with DB2

# toolstart
sql_query = "select user, tool as instance from nanohub_metrics.toolstart"

toolstart_df = pd.read_sql_query(sql_query, nanohub_metrics_db)

# tool_version
sql_query = "select toolname, instance from nanohub.jos_tool_version"

tool_version_df = pd.read_sql_query(sql_query, nanohub_db)

In [None]:
sql_query = '''
SELECT toolstart.user, toolstart.tool as instance
'''

In [None]:
# convert all to lower case
toolstart_df['user'] = toolstart_df['user'].apply(str.lower)
toolstart_df['instance'] = toolstart_df['instance'].apply(str.lower)

tool_version_df['toolname'] = tool_version_df['toolname'].apply(str.lower)
tool_version_df['instance'] = tool_version_df['instance'].apply(str.lower)

# display

display(toolstart_df.head(2))
display(tool_version_df.head(2))

In [None]:
# insert the toolname into instance as well
dummy = pd.DataFrame()
dummy['toolname'] = tool_version_df['toolname'].drop_duplicates()
dummy['instance'] = dummy['toolname']

tool_version_df = pd.concat([dummy, tool_version_df], axis=0).drop_duplicates().reset_index(drop=True)

# display
tool_version_df.head(3)

In [None]:
# merge toolname into toolstart
df = pd.merge(toolstart_df, tool_version_df, how='left', left_on='instance', right_on='instance')[['user', 'toolname']]

In [None]:
# count number of tool runs by each user, each tool
df = df.groupby(['user', 'toolname'])['toolname'].count().to_frame(name='cnt').reset_index()

In [None]:
# just in case some NaN values made through
df = df[~df.user.isna()]
df = df[~df.toolname.isna()]

# display
df.head(3)

In [None]:
display(df.loc[df['user'] == 'shrutik'])
display(df.loc[df['user'] == 'wodyd909'])
display(df.loc[df['user'] == 'nkissebe'])

## Obtain Salesforce IDs

In [None]:
# create DB2 to Salesforce API object

In [None]:
# query the Salesforce IDs for contacts and citations. when updating junction objects, these IDs must be used

# get Salesforce ID for contacts
sf_username_df = db_s.query_data('SELECT Id, nanoHUB_username__c FROM Contact where nanoHUB_username__c != NULL')

# display
sf_username_df.head(3)

In [None]:
# get Salesforce ID for citations
sf_tool_df = db_s.query_data('SELECT Id, Tool_name__c FROM nanoHUB_tools__c')

# display
sf_tool_df.head(3)

## Match data with Salesforce format

In [None]:
# make sure all strings are lower cases

sf_username_df['nanoHUB_username__c'] = sf_username_df['nanoHUB_username__c'].astype('str').apply(str.lower)

sf_tool_df['Tool_name__c'] = sf_tool_df['Tool_name__c'].astype('str').apply(str.lower)

In [None]:
# merge SF citation and contact IDs into user-citation DF
ct_tolink_df = pd.merge(df, sf_tool_df, how='inner', left_on='toolname', right_on='Tool_name__c')\
                           .rename(columns={'Id':'SF_ID_tool'})
                                            
ct_tolink_df = pd.merge(ct_tolink_df, sf_username_df, how='inner', left_on='user', right_on='nanoHUB_username__c')\
                           .rename(columns={'Id':'SF_ID_contact'})

# display
ct_tolink_df.head(2)

In [None]:
# create a new column for object name
ct_tolink_df['Name'] = ct_tolink_df.apply(lambda x: '%s_%s'%(x.nanoHUB_username__c, x.Tool_name__c), axis=1)

In [None]:
df_sf = pd.DataFrame()

# Make sure NaN and NaT values are taken care of here
df_sf['Name']         = ct_tolink_df['Name']
df_sf['Contact__c'] = ct_tolink_df['SF_ID_contact']
df_sf['Tool__c']       = ct_tolink_df['SF_ID_tool']
df_sf['Total_Runs__c'] = ct_tolink_df['cnt']
sf_original_fields = df_sf.columns

# display
df_sf.head(2).T

## To Salesforce Sales Cloud CRM

In [None]:
# create DB2 to Salesforce API object

# specify Salesforce object ID and external ID
db_s.object_id = object_id
db_s.external_id = external_id

In [None]:
# send data to Salesforce
db_s.send_data(df_sf)

In [None]:
# check status
from pprint import pprint

pprint(db_s.check_bulk_status())