# DB2-Salesforce connector: Citation information updates

In [1]:
# API settings
api_url = '/services/data/v43.0/sobjects'
external_id = 'Record_ID__c'
object_id = 'nanoHUB_citations__c'

In [2]:
import sys
import os
sys.path.append('/home/users/wang2506/nanohub_salesforce_integ/salesforce')
    
import pandas as pd
import datetime

## Obtain tool information from DB2 

In [3]:
# Basic tool information
sql_query = "select *, c.id as c_id from jos_citations as c \
            left join jos_citations_types as t \
            on c.type = t.id \
            where uid=2143"

# display
print(sql_query)

select *, c.id as c_id from jos_citations as c             left join jos_citations_types as t             on c.type = t.id             where uid=2143


In [4]:
from nanoHUB.application import Application

application = Application.get_instance()
nanohub_db = application.new_db_engine('nanohub')

salesforce = application.new_salesforce_engine()
db_s = salesforce

df = pd.read_sql_query(sql_query, nanohub_db)


``````````````````````````````````````````````````````````````````````````````````````````````````````````````````````````````````````````````````````
```````````````````````````````$$$$$``````````````````````````````````````````````````````````````````````````````````````````````````````````````````
`````````````````````````````$$$$`$$$$````````````````````````````````````````````````````````````````````````````````````````````````````````````````
`````````````````$$$$$```````$$`````$$$```````````````````````````````````````````````````````````````````````````````````````````````````````````````
````````````````$$```$$``````$$$$``$$$````````````````````````````````````````````````````````````````````````````````````````````````````````````````
````````````````$$$$$$$```````$$$$$$``````````````````````````````````````````````````````````````````````````````````````````````````````````````````
``````$$$$````````$$$$$```````$$$``````$$$$$$````````````````````````````````````````````````

In [5]:
# convert and clean some columns, so we can, for example, sort values
df['date_publish'] = pd.to_datetime(df['date_publish'], errors='coerce') 
df['created'] = pd.to_datetime(df['created'], errors='coerce') 
df['date_submit'] = pd.to_datetime(df['date_submit'], errors='coerce') 
df['date_accept'] = pd.to_datetime(df['date_accept'], errors='coerce') 

In [6]:
# display
display(df.head(1))
display('size='+str(df.shape))

Unnamed: 0,id,uid,affiliated,fundedby,created,address,author,booktitle,chapter,cite,...,custom2,custom3,custom4,pdf_url,id.1,type,type_title,type_desc,type_export,c_id
0,2023,2143,1,-1,NaT,,Suresh Garimella;D Janes{{8507}};Anne Andrew,,,,...,,,,https://nanohub.org/nanoHUBCitations/citmanage...,2,journal,Journal,,,2023


'size=(2508, 68)'

## Match data with Salesforce format

In [7]:
df_sf = pd.DataFrame()

# Make sure NaN and NaT values are taken care of here

df_sf['Record_ID__c']      = df['c_id']
df_sf['URL__c']            = df['url']
df_sf['NCN_Affiliated__c'] = df['affiliated'].apply(lambda x: 1 if x==1 else 0)
df_sf['Author_List__c']    = df['author']
df_sf['Journal__c']        = df['journal']
df_sf['Year__c']           = df['year']
df_sf['Type__c']           = df['ref_type']
df_sf['Notes__c']          = df['notes']
df_sf['Name']              = df['title'].apply(lambda x: x[:70])
df_sf['Full_title__c']     = df['title']
df_sf['Format__c']         = df['type_title']

sf_original_fields = df_sf.columns

# display
df_sf.head(2).T

Unnamed: 0,0,1
Record_ID__c,2023,518
URL__c,http://scholar.google.com/scholar?hl=en&q=A+Co...,http://www.ecsdl.org/getabs/servlet/GetabsServ...
NCN_Affiliated__c,1,1
Author_List__c,Suresh Garimella;D Janes{{8507}};Anne Andrew,Mark Lundstrom{{2862}};Yang Liu{{2558}}
Journal__c,,215th ECS Meeting/The Electrochemical Society
Year__c,2015,2009
Type__c,C,R
Notes__c,,{{The Electrochemical Society}}\r\nLKZ-short p...
Name,A Concept Paper On Networks Of Excellence For ...,Simulation-Based Study of III-V HEMTs Device P...
Full_title__c,A Concept Paper On Networks Of Excellence For ...,Simulation-Based Study of III-V HEMTs Device P...


## To Salesforce Sales Cloud CRM

In [8]:
# specify Salesforce object ID and external ID
db_s.object_id = object_id
db_s.external_id = external_id

In [9]:
# send data to Salesforce
db_s.send_data(df_sf)

2021-06-15 19:28:18,355 - [DEBUG] urllib3.connectionpool [connectionpool._new_conn:971]: Starting new HTTPS connection (1): na172.salesforce.com:443
2021-06-15 19:28:18,684 - [DEBUG] urllib3.connectionpool [connectionpool._make_request:452]: https://na172.salesforce.com:443 "POST /services/data/v47.0/jobs/ingest/ HTTP/1.1" 200 None
[Success] Bulk job creation successful. Job ID = 7505w00000Wx1rrAAB
hello
2021-06-15 19:28:18,715 - [DEBUG] urllib3.connectionpool [connectionpool._new_conn:971]: Starting new HTTPS connection (1): na172.salesforce.com:443
2021-06-15 19:28:21,127 - [DEBUG] urllib3.connectionpool [connectionpool._make_request:452]: https://na172.salesforce.com:443 "PUT /services/data/v47.0/jobs/ingest/7505w00000Wx1rrAAB/batches/ HTTP/1.1" 201 None
[Success] CSV upload successful. Job ID = 7505w00000Wx1rrAAB
2021-06-15 19:28:21,131 - [DEBUG] urllib3.connectionpool [connectionpool._new_conn:971]: Starting new HTTPS connection (1): na172.salesforce.com:443
2021-06-15 19:28:21,35

In [10]:
# check status
db_s.check_bulk_status()

2021-06-15 19:28:21,369 - [DEBUG] urllib3.connectionpool [connectionpool._new_conn:971]: Starting new HTTPS connection (1): na172.salesforce.com:443
2021-06-15 19:28:21,819 - [DEBUG] urllib3.connectionpool [connectionpool._make_request:452]: https://na172.salesforce.com:443 "GET /services/data/v47.0/jobs/ingest/7505w00000Wx1rrAAB HTTP/1.1" 200 None


{'id': '7505w00000Wx1rrAAB',
 'operation': 'upsert',
 'object': 'nanoHUB_citations__c',
 'createdById': '0055w00000DM5bOAAT',
 'createdDate': '2021-06-15T19:28:18.000+0000',
 'systemModstamp': '2021-06-15T19:28:21.000+0000',
 'state': 'InProgress',
 'externalIdFieldName': 'Record_ID__c',
 'concurrencyMode': 'Parallel',
 'contentType': 'CSV',
 'apiVersion': 47.0,
 'jobType': 'V2Ingest',
 'lineEnding': 'LF',
 'columnDelimiter': 'COMMA',
 'numberRecordsProcessed': 0,
 'numberRecordsFailed': 0,
 'retries': 0,
 'totalProcessingTime': 0,
 'apiActiveProcessingTime': 0,
 'apexProcessingTime': 0}

In [11]:
from pprint import pprint
pprint(db_s.check_bulk_failed_results())

2021-06-15 19:28:21,837 - [DEBUG] urllib3.connectionpool [connectionpool._new_conn:971]: Starting new HTTPS connection (1): na172.salesforce.com:443
2021-06-15 19:28:22,036 - [DEBUG] urllib3.connectionpool [connectionpool._make_request:452]: https://na172.salesforce.com:443 "GET /services/data/v47.0/jobs/ingest/7505w00000Wx1rrAAB/failedResults/ HTTP/1.1" 204 0
''
2021-06-15 19:28:23,147 - [DEBUG] paramiko.transport [transport._log:1819]: Sending global request "keepalive@lag.net"
2021-06-15 19:28:28,153 - [DEBUG] paramiko.transport [transport._log:1819]: Sending global request "keepalive@lag.net"
2021-06-15 19:28:33,170 - [DEBUG] paramiko.transport [transport._log:1819]: Sending global request "keepalive@lag.net"
2021-06-15 19:28:38,183 - [DEBUG] paramiko.transport [transport._log:1819]: Sending global request "keepalive@lag.net"
2021-06-15 19:28:43,215 - [DEBUG] paramiko.transport [transport._log:1819]: Sending global request "keepalive@lag.net"
2021-06-15 19:28:48,256 - [DEBUG] param