# DB2-Salesforce connector: Citation information updates

In [5]:
# API settings
api_url = '/services/data/v43.0/sobjects'
external_id = 'Record_ID__c'
object_id = 'nanoHUB_citations__c'

# login parameters to be handled by Papermill
sf_login_params = None 
sql_login_params = None

lib_dir = ''

In [6]:
import sys
sys.path.append(lib_dir)
    
import pandas as pd
import datetime

## Obtain tool information from DB2 

In [7]:
# Basic tool information
sql_query = "select *, c.id as c_id from jos_citations as c \
            left join jos_citations_types as t \
            on c.type = t.id \
            where uid=2143"

# display
print(sql_query)

select *, c.id as c_id from jos_citations as c             left join jos_citations_types as t             on c.type = t.id             where uid=2143


In [10]:
# connect with DB2
import sqlalchemy as sql

engine = sql.create_engine('mysql+pymysql://%s:%s@127.0.0.1/nanohub' \
                                               %(sql_login_params['username'], sql_login_params['password']))
df = pd.read_sql_query(sql_query, engine)

In [11]:
# convert and clean some columns, so we can, for example, sort values
df['date_publish'] = pd.to_datetime(df['date_publish'], errors='coerce') 
df['created'] = pd.to_datetime(df['created'], errors='coerce') 
df['date_submit'] = pd.to_datetime(df['date_submit'], errors='coerce') 
df['date_accept'] = pd.to_datetime(df['date_accept'], errors='coerce') 

In [12]:
# display
display(df.head(1))
display('size='+str(df.shape))

Unnamed: 0,id,uid,affiliated,fundedby,created,address,author,booktitle,chapter,cite,...,custom2,custom3,custom4,pdf_url,id.1,type,type_title,type_desc,type_export,c_id
0,2815,2143,0,-1,NaT,,David Crouse{{30286}};Joseph Skufca,,,crouse2018nature,...,,,,https://nanohub.org/nanoHUBCitations/citmanage...,2,journal,Journal,,,2815


'size=(2457, 68)'

## Match data with Salesforce format

In [13]:
df_sf = pd.DataFrame()

# Make sure NaN and NaT values are taken care of here

df_sf['Record_ID__c']      = df['c_id']
df_sf['URL__c']            = df['url']
df_sf['NCN_Affiliated__c'] = df['affiliated'].apply(lambda x: 1 if x==1 else 0)
df_sf['Author_List__c']    = df['author']
df_sf['Journal__c']        = df['journal']
df_sf['Year__c']           = df['year']
df_sf['Type__c']           = df['ref_type']
df_sf['Notes__c']          = df['notes']
df_sf['Name']              = df['title'].apply(lambda x: x[:70])
df_sf['Full_title__c']     = df['title']
df_sf['Format__c']         = df['type_title']
df_sf['PDF__c']         = df['pdf_url']

sf_original_fields = df_sf.columns

# display
df_sf.head(2).T

Unnamed: 0,0,1
Record_ID__c,2815,2303
URL__c,https://scholar.google.com/scholar?hl=en&q=htt...,http://scholar.google.com/scholar?hl=en&q=Nano...
NCN_Affiliated__c,0,0
Author_List__c,David Crouse{{30286}};Joseph Skufca,Walt Trybula{{17851}};Dominick Fazarro{{57234}...
Journal__c,ArXiv Preprint ArXiv:1803.03126,Global Perspectives of Nanoscience and Enginee...
Year__c,2018,2016
Type__c,R,R
Notes__c,,{{Springer International Publishing Switzerlan...
Name,On The Nature Of Discrete Space-Time: The Dist...,Nanotechnology Safety Education
Full_title__c,On The Nature Of Discrete Space-Time: The Dist...,Nanotechnology Safety Education


## To Salesforce Sales Cloud CRM

In [14]:
from DB2SalesforceAPI import DB2SalesforceAPI

# create DB2 to Salesforce API object
db_s = DB2SalesforceAPI(sf_login_params)

# specify Salesforce object ID and external ID
db_s.object_id = object_id
db_s.external_id = external_id

Obtained Salesforce access token ...... True


In [15]:
# send data to Salesforce
db_s.send_data(df_sf)

[Success] Bulk job creation successful. Job ID = 7505w00000MXGxjAAH
[Success] CSV upload successful. Job ID = 7505w00000MXGxjAAH
[Success] Closing job successful. Job ID = 7505w00000MXGxjAAH


In [18]:
# check status
db_s.check_bulk_status()

{'id': '7505w00000MXGxjAAH',
 'operation': 'upsert',
 'object': 'nanoHUB_citations__c',
 'createdById': '0055w00000ArpYvAAJ',
 'createdDate': '2020-05-21T23:17:06.000+0000',
 'systemModstamp': '2020-05-21T23:17:21.000+0000',
 'state': 'JobComplete',
 'externalIdFieldName': 'Record_ID__c',
 'concurrencyMode': 'Parallel',
 'contentType': 'CSV',
 'apiVersion': 47.0,
 'jobType': 'V2Ingest',
 'lineEnding': 'LF',
 'columnDelimiter': 'COMMA',
 'numberRecordsProcessed': 2457,
 'numberRecordsFailed': 0,
 'retries': 0,
 'totalProcessingTime': 6825,
 'apiActiveProcessingTime': 5181,
 'apexProcessingTime': 0}

In [17]:
from pprint import pprint
pprint(db_s.check_bulk_failed_results())

''
