# DB2-Salesforce connector: URL issues

In [1]:
# API settings
api_url = '/services/data/v43.0/sobjects'
object_id = 'nanoHUB_issue__c'
external_id = 'Issue_ID__c'

from nanoHUB.application import Application

application = Application.get_instance()
nanohub_db = application.new_db_engine('nanohub')

salesforce = application.new_salesforce_engine()
db_s = salesforce

[1mnanoHUB - Serving Students, Researchers & Instructors[0m


Obtained Salesforce access token ...... True


In [2]:

import pandas as pd
import datetime

## Obtain tool information from DB2 

In [3]:
# Basic tool information
sql_query = "select * from issue_invalid_urls"

# display
print(sql_query)

select * from issue_invalid_urls


In [4]:
wang159_myrmekes_db = application.new_db_engine('wang159_myrmekes')
df = pd.read_sql_query(sql_query, wang159_myrmekes_db)

In [5]:
# display
df.head(2)

Unnamed: 0,index,resource_ID,href,href_text,status
0,0,90,http://epics.ecn.purdue.edu/lspm,Lego Scanning Probe Microscope,404
1,1,90,http://www.inac.purdue.edu/education/lspm/exhi...,Lego SPM website,Connection error.


In [6]:
df_export = pd.DataFrame()

# Make sure NaN and NaT values are taken care of here

# construct a consistent ID for this issue
df_export[external_id]      =  'Resource_URL::::'+df.resource_ID+'::::'+df.href+'::::'+df.href_text

df_export['Resource ID'] = df['resource_ID']
df_export['Resource URL'] = 'https://nanohub.org/resources/'+df['resource_ID']
df_export['Reason']           = 'Invalid URL'

df_export['Invalid URL label'] = df['href_text']
df_export['Invalid URL'] = df['href']

# display
df_export.to_excel('./invalid_URL_export.xlsx', engine='xlsxwriter')

## Match data with Salesforce format

In [7]:
df_sf = pd.DataFrame()

# Make sure NaN and NaT values are taken care of here

# construct a consistent ID for this issue
df_sf[external_id]      =  'Resource_URL::::'+df.resource_ID+'::::'+df.href+'::::'+df.href_text

df_sf['nanoHUB_resource_ID__c'] = df['resource_ID']
df_sf['Origin_URL__c'] = 'https://nanohub.org/resources/'+df['resource_ID']
df_sf['Type__c']           = 'Invalid URL'

df_sf['Issue_message__c'] = df.apply(lambda x: \
         'Invalid URL found on page.<br><br>\
         Link label: <p style="color:Tomato;">%s</p><br>\
         Link: <p style="color:Tomato;">%s</p>'%(x.href_text, x.href), axis=1)

sf_original_fields = df_sf.columns

# display
df_sf.head(2).T

Unnamed: 0,0,1
Issue_ID__c,Resource_URL::::90::::http://epics.ecn.purdue....,Resource_URL::::90::::http://www.inac.purdue.e...
nanoHUB_resource_ID__c,90,90
Origin_URL__c,https://nanohub.org/resources/90,https://nanohub.org/resources/90
Type__c,Invalid URL,Invalid URL
Issue_message__c,Invalid URL found on page.<br><br> Lin...,Invalid URL found on page.<br><br> Lin...


## To Salesforce Sales Cloud CRM

In [8]:
# specify Salesforce object ID and external ID
db_s.object_id = object_id
db_s.external_id = external_id

In [9]:
# send data to Salesforce
db_s.send_data(df_sf)

[Success] Bulk job creation successful. Job ID = 7505w00000b2D3bAAE
hello


[Success] CSV upload successful. Job ID = 7505w00000b2D3bAAE


[Success] Closing job successful. Job ID = 7505w00000b2D3bAAE


In [10]:
# check status
db_s.check_bulk_status()

{'id': '7505w00000b2D3bAAE',
 'operation': 'upsert',
 'object': 'nanoHUB_issue__c',
 'createdById': '0055w00000DM5bOAAT',
 'createdDate': '2021-10-25T11:15:44.000+0000',
 'systemModstamp': '2021-10-25T11:15:46.000+0000',
 'state': 'UploadComplete',
 'externalIdFieldName': 'Issue_ID__c',
 'concurrencyMode': 'Parallel',
 'contentType': 'CSV',
 'apiVersion': 47.0,
 'jobType': 'V2Ingest',
 'lineEnding': 'LF',
 'columnDelimiter': 'COMMA',
 'numberRecordsProcessed': 0,
 'numberRecordsFailed': 0,
 'retries': 0,
 'totalProcessingTime': 0,
 'apiActiveProcessingTime': 0,
 'apexProcessingTime': 0}

In [11]:
from pprint import pprint
pprint(db_s.check_bulk_failed_results())

''
