# DB2-Salesforce connector: Tool usage clusters mapping to tools

In [1]:
# API settings
api_url = '/services/data/v43.0/sobjects'
external_id = 'Name'
object_id = 'ToolToolClusterAssociation__c'

from nanoHUB.application import Application

application = Application.get_instance()
wang159_myrmekes_db = application.new_db_engine('rfm_data')


salesforce = application.new_salesforce_engine()
db_s = salesforce


[1mnanoHUB - Serving Students, Researchers & Instructors[0m
Obtained Salesforce access token ...... True


In [2]:
import pandas as pd
import time
import datetime

# from DB2SalesforceAPI import DB2SalesforceAPI

## Obtain tool information from DB2 

In [3]:

classtool_info_df = pd.read_sql_query('select * from cluster_classtool_info', wang159_myrmekes_db)

In [4]:
classtool_info_df.head(3)

Unnamed: 0,index,toolname,class_id
0,0,geneticalgo,0
1,1,geneticalgo,1
2,2,geneticalgo,2


## Obtain Salesforce IDs

In [5]:
# query the Salesforce IDs for contacts and citations. when updating junction objects, these IDs must be used

# get Salesforce ID for contacts
sf_toolname_df = db_s.query_data('SELECT Id, Tool_name__c FROM nanoHUB_tools__c where Tool_name__c != NULL')

[Success] Bulk job creation successful. Job ID = 7505w00000cyZwIAAU
{"id":"7505w00000cyZwIAAU","operation":"query","object":"nanoHUB_tools__c","createdById":"0055w00000DM5bOAAT","createdDate":"2022-01-31T15:31:07.000+0000","systemModstamp":"2022-01-31T15:31:07.000+0000","state":"UploadComplete","concurrencyMode":"Parallel","contentType":"CSV","apiVersion":47.0,"jobType":"V2Query","lineEnding":"LF","columnDelimiter":"COMMA","retries":0,"totalProcessingTime":0}
{"id":"7505w00000cyZwIAAU","operation":"query","object":"nanoHUB_tools__c","createdById":"0055w00000DM5bOAAT","createdDate":"2022-01-31T15:31:07.000+0000","systemModstamp":"2022-01-31T15:31:08.000+0000","state":"JobComplete","concurrencyMode":"Parallel","contentType":"CSV","apiVersion":47.0,"jobType":"V2Query","lineEnding":"LF","columnDelimiter":"COMMA","numberRecordsProcessed":1859,"retries":0,"totalProcessingTime":258}
[Success] Bulk job completed successfully.


In [6]:
# get Salesforce ID for citations
sf_clusterID_df = db_s.query_data('SELECT Id, ID__c FROM tool_usage_cluster__c')

sf_clusterID_df['ID__c'] = sf_clusterID_df['ID__c'].astype('int')

[Success] Bulk job creation successful. Job ID = 7505w00000cyZwvAAE
{"id":"7505w00000cyZwvAAE","operation":"query","object":"tool_usage_cluster__c","createdById":"0055w00000DM5bOAAT","createdDate":"2022-01-31T15:31:18.000+0000","systemModstamp":"2022-01-31T15:31:18.000+0000","state":"UploadComplete","concurrencyMode":"Parallel","contentType":"CSV","apiVersion":47.0,"jobType":"V2Query","lineEnding":"LF","columnDelimiter":"COMMA","retries":0,"totalProcessingTime":0}
{"id":"7505w00000cyZwvAAE","operation":"query","object":"tool_usage_cluster__c","createdById":"0055w00000DM5bOAAT","createdDate":"2022-01-31T15:31:18.000+0000","systemModstamp":"2022-01-31T15:31:21.000+0000","state":"JobComplete","concurrencyMode":"Parallel","contentType":"CSV","apiVersion":47.0,"jobType":"V2Query","lineEnding":"LF","columnDelimiter":"COMMA","numberRecordsProcessed":4576,"retries":0,"totalProcessingTime":553}
[Success] Bulk job completed successfully.


## Match data with Salesforce format

In [7]:
# Display
display(classtool_info_df.head(2))
display(sf_toolname_df.head(2))
display(sf_clusterID_df.head(2))

Unnamed: 0,index,toolname,class_id
0,0,geneticalgo,0
1,1,geneticalgo,1


Unnamed: 0,Id,Tool_name__c
0,a0s5w00000k5MOwAAM,ellipsom
1,a0s5w00000k5MOxAAM,hydrolab


Unnamed: 0,ID__c,Id
0,4,a0w5w000009Q7CuAAK
1,5,a0w5w000009Q7CvAAK


In [8]:
# merge dataframes
ct_tolink_df = pd.merge(classtool_info_df, sf_toolname_df, how='inner', left_on='toolname', right_on='Tool_name__c')\
                           .rename(columns={'Id':'SF_ID_tool'})

ct_tolink_df = pd.merge(ct_tolink_df, sf_clusterID_df, how='inner', left_on='class_id', right_on='ID__c')\
                           .rename(columns={'Id':'SF_ID_cluster'})

# display
ct_tolink_df.head(2)

Unnamed: 0,index,toolname,class_id,SF_ID_tool,Tool_name__c,ID__c,SF_ID_cluster
0,0,geneticalgo,0,a0s5w00000k5MXTAA2,geneticalgo,0,a0w5w00000A8U1UAAV
1,1,geneticalgo,1,a0s5w00000k5MXTAA2,geneticalgo,1,a0w5w00000A8hzeAAB


In [9]:
# create a new column for object name
ct_tolink_df['Name'] = ct_tolink_df.apply(lambda x: '%s_%d'%(x.Tool_name__c, x.ID__c), axis=1)

In [10]:
df_sf = pd.DataFrame()

# Make sure NaN and NaT values are taken care of here
df_sf['Name']         = ct_tolink_df['Name']
df_sf['Tool__c'] = ct_tolink_df['SF_ID_tool']
df_sf['Tool_Usage_Cluster__c'] = ct_tolink_df['SF_ID_cluster']

sf_original_fields = df_sf.columns

# display
display(df_sf.head())
display(df_sf.tail())
display('Total rows: ', df_sf.shape[0])

Unnamed: 0,Name,Tool__c,Tool_Usage_Cluster__c
0,geneticalgo_0,a0s5w00000k5MXTAA2,a0w5w00000A8U1UAAV
1,geneticalgo_1,a0s5w00000k5MXTAA2,a0w5w00000A8hzeAAB
2,geneticalgo_2,a0s5w00000k5MXTAA2,a0w5w00000A8hzfAAB
3,geneticalgo_3,a0s5w00000k5MXTAA2,a0w5w00000A8hzgAAB
4,bmcsuite_4,a0s5w00000k5MRtAAM,a0w5w000009Q7CuAAK


Unnamed: 0,Name,Tool__c,Tool_Usage_Cluster__c
6472,latticeprotein_4232,a0s5w00000mgTUNAA2,a0w5w00000AXc7wAAD
6473,2dfets_4266,a0s5w00000mgAjaAAE,a0w5w00000AXc8UAAT
6474,mdshowcase_4155,a0s5w00000k5MbDAAU,a0w5w00000AXc47AAD
6475,dsers_4157,a0s5w00000k5MfyAAE,a0w5w00000AXc49AAD
6476,rdf_4289,a0s5w00000k5MmnAAE,a0w5w00000AXc8rAAD


'Total rows: '

6477

## To Salesforce Sales Cloud CRM

In [15]:
# specify Salesforce object ID and external ID
db_s.object_id = object_id
db_s.external_id = external_id

print(db_s.object_id)

ToolToolClusterAssociation__c


In [12]:
# send data to Salesforce
db_s.send_data(df_sf)

[Success] Bulk job creation successful. Job ID = 7505w00000cyZx0AAE
hello
[Success] CSV upload successful. Job ID = 7505w00000cyZx0AAE
[Success] Closing job successful. Job ID = 7505w00000cyZx0AAE


In [13]:
# check status
from pprint import pprint

pprint(db_s.check_bulk_status())

{'apexProcessingTime': 0,
 'apiActiveProcessingTime': 0,
 'apiVersion': 47.0,
 'columnDelimiter': 'COMMA',
 'concurrencyMode': 'Parallel',
 'contentType': 'CSV',
 'createdById': '0055w00000DM5bOAAT',
 'createdDate': '2022-01-31T15:31:31.000+0000',
 'externalIdFieldName': 'Name',
 'id': '7505w00000cyZx0AAE',
 'jobType': 'V2Ingest',
 'lineEnding': 'LF',
 'numberRecordsFailed': 0,
 'numberRecordsProcessed': 0,
 'object': 'ToolToolClusterAssociation__c',
 'operation': 'upsert',
 'retries': 0,
 'state': 'InProgress',
 'systemModstamp': '2022-01-31T15:31:32.000+0000',
 'totalProcessingTime': 0}
