# DB2-Salesforce connector: Tool usage clusters information updates

In [1]:
# API settings
api_url = '/services/data/v43.0/sobjects'
external_id = 'ID__c'
object_id = 'tool_usage_cluster__c'

from nanoHUB.application import Application

application = Application.get_instance()
wang159_myrmekes_db = application.new_db_engine('rfm_data')

salesforce = application.new_salesforce_engine()
db_s = salesforce

[1mnanoHUB - Serving Students, Researchers & Instructors[0m
Obtained Salesforce access token ...... True


In [2]:
    
import pandas as pd
import datetime

## Obtain cluster information from DB2 

In [3]:
class_info_df = pd.read_sql_query('select * from cluster_class_info', wang159_myrmekes_db)
#students_info_df = pd.read_sql_query('select * from cluster_students_info', engine)
#classtool_info_df = pd.read_sql_query('select * from cluster_classtool_info', engine)

# display
display(class_info_df.head(2))
#display(students_info_df.head(2))
#display(classtool_info_df.head(2))

Unnamed: 0,index,class_id,start,end,lon,lat,size
0,0,0,2013-09-13,2013-09-20,-86.907866,40.430972,93
1,1,1,2013-09-15,2013-09-20,-87.924079,42.019433,10


In [4]:
# convert and clean some columns, so we can, for example, sort values
class_info_df['start'] = pd.to_datetime(class_info_df['start'], errors='coerce') 
class_info_df['end'] = pd.to_datetime(class_info_df['end'], errors='coerce')

In [5]:
# create geolocation Estimated_Location__c
# {"house":{"location__latitude__s":34, "location__longitude__s":23}}

class_info_df['Estimated_Location__c'] = class_info_df.apply(lambda x: {"Estimated_Location__c":\
                                                                 {"location__latitude__s":x.lat, "location__longitude__s":x.lon}}, axis=1)

class_info_df['Map_Location__c'] = class_info_df\
      .apply(lambda x: 'https://www.google.com/maps/@?api=1&map_action=map&center=%f,%f&zoom=9'%(x.lat, x.lon), axis=1)

In [6]:
class_info_df.head(2)

Unnamed: 0,index,class_id,start,end,lon,lat,size,Estimated_Location__c,Map_Location__c
0,0,0,2013-09-13,2013-09-20,-86.907866,40.430972,93,{'Estimated_Location__c': {'location__latitude...,https://www.google.com/maps/@?api=1&map_action...
1,1,1,2013-09-15,2013-09-20,-87.924079,42.019433,10,{'Estimated_Location__c': {'location__latitude...,https://www.google.com/maps/@?api=1&map_action...


In [12]:
class_info_df.tail()

Unnamed: 0,index,class_id,start,end,lon,lat,size,Estimated_Location__c,Map_Location__c
4571,281,4571,2021-10-13,2021-10-17,-86.914703,40.425098,7,{'Estimated_Location__c': {'location__latitude...,https://www.google.com/maps/@?api=1&map_action...
4572,282,4572,2021-11-12,2021-11-12,-86.923782,40.441184,6,{'Estimated_Location__c': {'location__latitude...,https://www.google.com/maps/@?api=1&map_action...
4573,283,4573,2021-12-04,2021-12-15,-63.595896,44.646776,16,{'Estimated_Location__c': {'location__latitude...,https://www.google.com/maps/@?api=1&map_action...
4574,284,4574,2021-09-21,2021-09-25,-76.510767,42.453044,10,{'Estimated_Location__c': {'location__latitude...,https://www.google.com/maps/@?api=1&map_action...
4575,285,4575,2021-09-01,2021-09-05,121.002754,14.534543,17,{'Estimated_Location__c': {'location__latitude...,https://www.google.com/maps/@?api=1&map_action...


## Match data with Salesforce format

In [7]:
df_sf = pd.DataFrame()

# Make sure NaN and NaT values are taken care of here

df_sf['ID__c']                             = class_info_df['class_id']
df_sf['Starting_Date__c']          = class_info_df['start']
df_sf['Ending_Date__c']            = class_info_df['end']
df_sf['Map_Location__c'] = class_info_df['Map_Location__c']
df_sf['Estimated_Location__latitude__s'] = class_info_df['lat']
df_sf['Estimated_Location__longitude__s'] = class_info_df['lon']

sf_original_fields = df_sf.columns

# display
df_sf.head(2).T

Unnamed: 0,0,1
ID__c,0,1
Starting_Date__c,2013-09-13 00:00:00,2013-09-15 00:00:00
Ending_Date__c,2013-09-20 00:00:00,2013-09-20 00:00:00
Map_Location__c,https://www.google.com/maps/@?api=1&map_action...,https://www.google.com/maps/@?api=1&map_action...
Estimated_Location__latitude__s,40.430972,42.019433
Estimated_Location__longitude__s,-86.907866,-87.924079


## To Salesforce Sales Cloud CRM

In [8]:

# specify Salesforce object ID and external ID
db_s.object_id = object_id
db_s.external_id = external_id

In [9]:
# send data to Salesforce
db_s.send_data(df_sf)

[Success] Bulk job creation successful. Job ID = 7505w00000cyX1xAAE
hello
[Success] CSV upload successful. Job ID = 7505w00000cyX1xAAE
[Success] Closing job successful. Job ID = 7505w00000cyX1xAAE


In [10]:
# check status
db_s.check_bulk_status()

{'id': '7505w00000cyX1xAAE',
 'operation': 'upsert',
 'object': 'tool_usage_cluster__c',
 'createdById': '0055w00000DM5bOAAT',
 'createdDate': '2022-01-31T03:30:55.000+0000',
 'systemModstamp': '2022-01-31T03:30:56.000+0000',
 'state': 'InProgress',
 'externalIdFieldName': 'ID__c',
 'concurrencyMode': 'Parallel',
 'contentType': 'CSV',
 'apiVersion': 47.0,
 'jobType': 'V2Ingest',
 'lineEnding': 'LF',
 'columnDelimiter': 'COMMA',
 'numberRecordsProcessed': 0,
 'numberRecordsFailed': 0,
 'retries': 0,
 'totalProcessingTime': 0,
 'apiActiveProcessingTime': 0,
 'apexProcessingTime': 0}

In [11]:
from pprint import pprint
pprint(db_s.check_bulk_failed_results())

''
