# DB2-Salesforce connector: Tools mapping to users

In [1]:
# API settings
api_url = '/services/data/v43.0/sobjects'
external_id = 'Name'
object_id = 'ContactToolAssociation__c'

from nanoHUB.application import Application

application = Application.get_instance()
nanohub_db = application.new_db_engine('nanohub')
nanohub_metrics_db = application.new_db_engine('nanohub_metrics')


salesforce = application.new_salesforce_engine()
db_s = salesforce

[1mnanoHUB - Serving Students, Researchers & Instructors[0m
Obtained Salesforce access token ...... True


In [2]:
import pandas as pd
import time
import datetime

## Obtain tool information from DB2 

In [3]:
# connect with DB2

# toolstart
sql_query = '''
SELECT DISTINCT LOWER(users.username),
                LOWER(versions.toolname) AS tool_name,
                LOWER(toolstart.tool) AS tool_version,
                COUNT(toolstart.id) AS COUNT,
                MIN(COALESCE(toolstart.datetime)) AS min_datetime,
                MAX(COALESCE(toolstart.datetime)) AS max_datetime,
                users.registerDate
FROM nanohub_metrics.toolstart toolstart
  JOIN nanohub.jos_tool_version versions
    ON toolstart.tool = versions.instance
  JOIN nanohub.jos_users users
    ON users.username = toolstart.user
GROUP BY users.username, toolstart.tool
'''



In [4]:
toolstart_df = pd.read_sql_query(sql_query, nanohub_metrics_db)
display(toolstart_df.head())
display(toolstart_df.tail())

Unnamed: 0,username,tool_name,tool_version,COUNT,min_datetime,max_datetime,registerDate
0,0.yao.yuan,mosfetsat,mosfetsat_r66,4,2017-03-08 10:15:23,2017-03-08 10:33:22,2017-03-08 15:14:41
1,0.yao.yuan,mosfet,mosfet_r134,11,2017-03-08 11:36:08,2017-03-09 05:09:22,2017-03-08 15:14:41
2,007sribabu,deviceelectron,deviceelectron_r21,1,2021-11-06 20:46:21,2021-11-06 20:46:21,2021-11-07 03:43:24
3,008dilip,abacus,abacus_r31,14,2016-09-28 20:27:13,2016-10-12 23:50:56,2016-09-29 00:25:23
4,00ff,bmcsuite,bmcsuite_r40,2,2013-11-19 00:53:23,2013-11-19 00:58:56,2013-11-19 05:49:54


Unnamed: 0,username,tool_name,tool_version,COUNT,min_datetime,max_datetime,registerDate
525932,z_e_u_s,cenems,cenems,1,2006-10-16 20:25:49,2006-10-16 20:25:49,2006-10-16 20:24:48
525933,z_henry35,cndo,cndo_r34,10,2013-02-18 15:24:54,2013-02-19 01:53:39,2013-02-08 15:08:41
525934,z_henry35,nsoptics,nsoptics_r21,37,2013-02-11 14:42:41,2013-02-12 02:12:28,2013-02-08 15:08:41
525935,z_sath,fermi,fermi_r27,3,2008-07-09 22:51:51,2008-07-09 22:52:23,2008-07-06 23:55:51
525936,z_sath,nanomos,nanomos_r79,2,2008-07-23 09:39:12,2008-07-23 09:39:34,2008-07-06 23:55:51


In [4]:
# convert all to lower case
toolstart_df['user'] = toolstart_df['user'].apply(str.lower)
toolstart_df['instance'] = toolstart_df['instance'].apply(str.lower)

tool_version_df['toolname'] = tool_version_df['toolname'].apply(str.lower)
tool_version_df['instance'] = tool_version_df['instance'].apply(str.lower)

# display

display(toolstart_df.head(2))
display(tool_version_df.head(2))

Unnamed: 0,user,instance
0,herre,cnt-iv
1,donovan,huckel-iv


Unnamed: 0,toolname,instance
0,,
1,112016a,112016a_dev


In [5]:
# insert the toolname into instance as well
dummy = pd.DataFrame()
dummy['toolname'] = tool_version_df['toolname'].drop_duplicates()
dummy['instance'] = dummy['toolname']

tool_version_df = pd.concat([dummy, tool_version_df], axis=0).drop_duplicates().reset_index(drop=True)

# display
tool_version_df.head(3)

Unnamed: 0,toolname,instance
0,,
1,112016a,112016a
2,1dbtetransient,1dbtetransient


In [6]:
# merge toolname into toolstart
df = pd.merge(toolstart_df, tool_version_df, how='left', left_on='instance', right_on='instance')[['user', 'toolname']]

In [7]:
# count number of tool runs by each user, each tool
df = df.groupby(['user', 'toolname'])['toolname'].count().to_frame(name='cnt').reset_index()

In [8]:
# just in case some NaN values made through
df = df[~df.user.isna()]
df = df[~df.toolname.isna()]

# display
df.head(3)

Unnamed: 0,user,toolname,cnt
0,,,885
1,0.yao.yuan,mosfet,11
2,0.yao.yuan,mosfetsat,4


In [9]:
display(df.loc[df['user'] == 'pjs970918'])
display(df.loc[df['user'] == 'wodyd909'])
display(df.loc[df['user'] == 'shrutik'])

Unnamed: 0,user,toolname,cnt
326760,pjs970918,semi,6


Unnamed: 0,user,toolname,cnt
454182,wodyd909,abacus,8


Unnamed: 0,user,toolname,cnt
389279,shrutik,abacus,1


## Obtain Salesforce IDs

In [10]:
# create DB2 to Salesforce API object

In [11]:
# query the Salesforce IDs for contacts and citations. when updating junction objects, these IDs must be used

# get Salesforce ID for contacts
sf_username_df = db_s.query_data('SELECT Id, nanoHUB_username__c FROM Contact where nanoHUB_username__c != NULL')

# display
sf_username_df.head(3)

[Success] Bulk job creation successful. Job ID = 7508W00000f5uhUQAQ
{"id":"7508W00000f5uhUQAQ","operation":"query","object":"Contact","createdById":"0055w00000DM5bOAAT","createdDate":"2022-04-08T03:10:56.000+0000","systemModstamp":"2022-04-08T03:10:56.000+0000","state":"InProgress","concurrencyMode":"Parallel","contentType":"CSV","apiVersion":47.0,"jobType":"V2Query","lineEnding":"LF","columnDelimiter":"COMMA","numberRecordsProcessed":0,"retries":0,"totalProcessingTime":0}
{"id":"7508W00000f5uhUQAQ","operation":"query","object":"Contact","createdById":"0055w00000DM5bOAAT","createdDate":"2022-04-08T03:10:56.000+0000","systemModstamp":"2022-04-08T03:11:06.000+0000","state":"JobComplete","concurrencyMode":"Parallel","contentType":"CSV","apiVersion":47.0,"jobType":"V2Query","lineEnding":"LF","columnDelimiter":"COMMA","numberRecordsProcessed":277656,"retries":0,"totalProcessingTime":10164}
[Success] Bulk job completed successfully.


Unnamed: 0,Id,nanoHUB_username__c
0,0035w000031Vsp1AAC,hubrepo
1,0035w000031Vsp2AAC,support
2,0035w000031Vsp3AAC,gridstat


In [12]:
sf_username_df.tail()

Unnamed: 0,Id,nanoHUB_username__c
252212,0035w00003YY1KCAA1,-185514
252213,0035w00003YY1KDAA1,nxp790
252214,0035w00003YY1KEAA1,lng35
252215,0035w00003YY1KFAA1,agrawalmahima361
252216,0035w00003YY1KGAA1,eee_aiub


In [13]:
display(sf_username_df.loc[sf_username_df['nanoHUB_username__c'] == 'pjs970918'])
display(sf_username_df.loc[sf_username_df['nanoHUB_username__c'] == 'wodyd909'])
display(sf_username_df.loc[sf_username_df['nanoHUB_username__c'] == 'shrutik'])

Unnamed: 0,Id,nanoHUB_username__c


Unnamed: 0,Id,nanoHUB_username__c


Unnamed: 0,Id,nanoHUB_username__c


In [14]:
# get Salesforce ID for citations
sf_tool_df = db_s.query_data('SELECT Id, Tool_name__c FROM nanoHUB_tools__c')

# display
sf_tool_df.head(3)

[Success] Bulk job creation successful. Job ID = 7508W00000f5uhjQAA
{"id":"7508W00000f5uhjQAA","operation":"query","object":"nanoHUB_tools__c","createdById":"0055w00000DM5bOAAT","createdDate":"2022-04-08T03:11:15.000+0000","systemModstamp":"2022-04-08T03:11:16.000+0000","state":"JobComplete","concurrencyMode":"Parallel","contentType":"CSV","apiVersion":47.0,"jobType":"V2Query","lineEnding":"LF","columnDelimiter":"COMMA","numberRecordsProcessed":1878,"retries":0,"totalProcessingTime":230}
[Success] Bulk job completed successfully.


Unnamed: 0,Id,Tool_name__c
0,a0s5w00000k5MOwAAM,ellipsom
1,a0s5w00000k5MOxAAM,hydrolab
2,a0s5w00000k5MOyAAM,huckel


## Match data with Salesforce format

In [15]:
# make sure all strings are lower cases

sf_username_df['nanoHUB_username__c'] = sf_username_df['nanoHUB_username__c'].astype('str').apply(str.lower)

sf_tool_df['Tool_name__c'] = sf_tool_df['Tool_name__c'].astype('str').apply(str.lower)

In [16]:
# merge SF citation and contact IDs into user-citation DF
ct_tolink_df = pd.merge(df, sf_tool_df, how='inner', left_on='toolname', right_on='Tool_name__c')\
                           .rename(columns={'Id':'SF_ID_tool'})
                                            

# display
ct_tolink_df.head(2)

Unnamed: 0,user,toolname,cnt,SF_ID_tool,Tool_name__c
0,0.yao.yuan,mosfet,11,a0s5w00000k5MPbAAM,mosfet
1,01024461660a,mosfet,90,a0s5w00000k5MPbAAM,mosfet


In [17]:
ct_tolink_df = pd.merge(ct_tolink_df, sf_username_df, how='inner', left_on='user', right_on='nanoHUB_username__c')\
                           .rename(columns={'Id':'SF_ID_contact'})

# display
ct_tolink_df.head(2)

Unnamed: 0,user,toolname,cnt,SF_ID_tool,Tool_name__c,SF_ID_contact,nanoHUB_username__c
0,0.yao.yuan,mosfet,11,a0s5w00000k5MPbAAM,mosfet,0035w000034IX15AAG,0.yao.yuan
1,0.yao.yuan,mosfetsat,4,a0s5w00000k5Mc5AAE,mosfetsat,0035w000034IX15AAG,0.yao.yuan


In [18]:
display(ct_tolink_df.loc[ct_tolink_df['user'] == 'pjs970918'])
display(ct_tolink_df.loc[ct_tolink_df['user'] == 'wodyd909'])
display(ct_tolink_df.loc[ct_tolink_df['user'] == 'shrutik'])

Unnamed: 0,user,toolname,cnt,SF_ID_tool,Tool_name__c,SF_ID_contact,nanoHUB_username__c


Unnamed: 0,user,toolname,cnt,SF_ID_tool,Tool_name__c,SF_ID_contact,nanoHUB_username__c


Unnamed: 0,user,toolname,cnt,SF_ID_tool,Tool_name__c,SF_ID_contact,nanoHUB_username__c


In [19]:
# create a new column for object name
ct_tolink_df['Name'] = ct_tolink_df.apply(lambda x: '%s_%s'%(x.nanoHUB_username__c, x.Tool_name__c), axis=1)

In [20]:
df_sf = pd.DataFrame()

# Make sure NaN and NaT values are taken care of here
df_sf['Name']         = ct_tolink_df['Name']
df_sf['Contact__c'] = ct_tolink_df['SF_ID_contact']
df_sf['Tool__c']       = ct_tolink_df['SF_ID_tool']
df_sf['Total_Runs__c'] = ct_tolink_df['cnt']
sf_original_fields = df_sf.columns

# display
df_sf.head(2).T

Unnamed: 0,0,1
Name,0.yao.yuan_mosfet,0.yao.yuan_mosfetsat
Contact__c,0035w000034IX15AAG,0035w000034IX15AAG
Tool__c,a0s5w00000k5MPbAAM,a0s5w00000k5Mc5AAE
Total_Runs__c,11,4


In [21]:
display(df_sf.loc[df_sf['Contact__c'] == '362377'])
# display(df_sf.loc[df_sf['Contact__c'] == 'wodyd909'])
# display(df_sf.loc[df_sf['Contact__c'] == 'nkissebe'])

Unnamed: 0,Name,Contact__c,Tool__c,Total_Runs__c


## To Salesforce Sales Cloud CRM

In [22]:
# create DB2 to Salesforce API object

# specify Salesforce object ID and external ID
db_s.object_id = object_id
db_s.external_id = external_id

In [23]:
# send data to Salesforce
db_s.send_data(df_sf)

[Success] Bulk job creation successful. Job ID = 7508W00000f5uiSQAQ
hello
[Success] CSV upload successful. Job ID = 7508W00000f5uiSQAQ
[Success] Closing job successful. Job ID = 7508W00000f5uiSQAQ


In [24]:
# check status
from pprint import pprint

pprint(db_s.check_bulk_status())

{'apexProcessingTime': 0,
 'apiActiveProcessingTime': 0,
 'apiVersion': 47.0,
 'columnDelimiter': 'COMMA',
 'concurrencyMode': 'Parallel',
 'contentType': 'CSV',
 'createdById': '0055w00000DM5bOAAT',
 'createdDate': '2022-04-08T03:12:22.000+0000',
 'externalIdFieldName': 'Name',
 'id': '7508W00000f5uiSQAQ',
 'jobType': 'V2Ingest',
 'lineEnding': 'LF',
 'numberRecordsFailed': 0,
 'numberRecordsProcessed': 0,
 'object': 'ContactToolAssociation__c',
 'operation': 'upsert',
 'retries': 0,
 'state': 'InProgress',
 'systemModstamp': '2022-04-08T03:12:34.000+0000',
 'totalProcessingTime': 0}
