# DB2-Salesforce connector: Basic user information updates

In [2]:
# Parameters
hours_range = 12 # number of days to look back

# setting
bulk_api_threshold = 100 # if more records than this, use Bulk API instead
disallow_bulk_api = False # if Bulk API is allowed

composite_api_limit = 25 # maximum number of records being passed via Composite API
composite_url = '/services/data/v43.0/sobjects/Contact/nanoHUB_user_ID__c/'

# Salesforce login parameters
sf_login_params = {
    "grant_type":"password", 
    "client_id":"3MVG95jctIhbyCppj0SNJ75IsZ1y8UPGZtSNF4j8FNVXz.De8Lu4jHm3rjRosAtsHy6qjHx3i4S_QbQzvBePG", \
    "client_secret":"D1623C6D3607D4FC8004B92C761DFB6C1F70CCD129C5501E357028DFA00F5764", \
    "username":"wang159-4j1v@force.com", \
    "password":"napoleon0eZ3PQpQqE3C3z4wWqEqKGhQ8"
}

In [3]:
import pandas as pd
import datetime

## Obtain contacts from DB2 
that have last visit date within range of interest specified by day_range

In [4]:
# Depending on the task, use different query and cutoff

# Hourly update for new registrations
date_cutoff = (datetime.datetime.today().date() - datetime.timedelta(hours=hours_range))\
                    .strftime('%Y-%m-%d')
sql_query = "select id, name, username, block, email, sendEmail, registerDate, lastvisitDate \
                    from jos_users where registerDate >= '%s'" % date_cutoff

# TEST
date_cutoff = (datetime.datetime.today().date() - datetime.timedelta(hours=hours_range))\
                    .strftime('%Y-%m-%d')
sql_query = "select id, name, username, block, email, sendEmail, registerDate, lastvisitDate \
                    from jos_users"
# display
print(sql_query)

select id, name, username, block, email, sendEmail, registerDate, lastvisitDate                     from jos_users


In [5]:
# connect with DB2
import sqlalchemy as sql

engine = sql.create_engine('mysql+pymysql://wang159_ro:napoleon0@127.0.0.1/nanohub')
df = pd.read_sql_query(sql_query, engine)

In [6]:
# display
df.head(3)

Unnamed: 0,id,name,username,block,email,sendEmail,registerDate,lastvisitDate
0,998,hubrepo hubrepo,hubrepo,0,nkissebe@gmail.com,0,2014-11-13 21:09:09,NaT
1,1000,Hub Admin User,hubadmin,0,root@localhost,0,NaT,NaT
2,1683,nanoHUB support,support,0,support@nanohub.org,0,2008-11-19 22:51:04,2008-11-19 23:55:30


## Match data with Salesforce format

#### Salesforce contact fields
API name : explaination

- firstname
- Middle_name__c
- lastname
- Email
- HasOptedOutOfEmail
- nanoHUB_account_BLOCKED__c
- nanoHUB_last_active_date__c
- nanoHUB_registration_date__c
- nanoHUB_user_ID__c
- nanoHUB_username__c

In [499]:
# split full name into first, middle, and last names
def split_full_name(this_name):
    this_name_list = list(filter(None, this_name.split(' ')))
    
    if len(this_name_list) == 1:
        # single word name
        return pd.Series([this_name_list[0],None, this_name_list[0]])
    
    elif len(this_name_list) > 1:
        # multi word name
        return pd.Series([this_name_list[0],' '.join(this_name_list[1:-1]),this_name_list[-1]])

In [500]:
df_sf = pd.DataFrame()

# Make sure NaN and NaT values are taken care of here
df_sf[['firstname', 'Middle_name__c', 'lastname']] = df['name'].apply(split_full_name)

df_sf['nanoHUB_user_ID__c']            = df['id']
df_sf['nanoHUB_username__c']           = df['username']
df_sf['Email']                         = df['email'].fillna('').apply(lambda x: '' if '@invalid' in x else x)

# for sendEmail: 0 = opt-out, 1 = receive email. For salesforce HasOptedOutOfEmail, it's exact opposite
df_sf['HasOptedOutOfEmail']            = df['sendEmail'].apply(lambda x: 0 if x==1 else 1)
df_sf['nanoHUB_account_BLOCKED__c']    = df['block'].fillna(0)

# solidify time-related columns from datetime to string
df_sf['nanoHUB_registration_date__c']  = df['registerDate'].dt.date.fillna('').astype('str')
df_sf['nanoHUB_last_active_date__c']   = df['lastvisitDate'].dt.date.fillna('').astype('str')

sf_original_fields = df_sf.columns

# display
df_sf.head(2).T

Unnamed: 0,0,1
firstname,raphael,Steve
Middle_name__c,,
lastname,caloz,Bedell
nanoHUB_user_ID__c,286035,286037
nanoHUB_username__c,calozraphael,forwhombedelltolls
Email,calozraphael@gmail.com,forwhombedelltolls@gmail.com
HasOptedOutOfEmail,1,1
nanoHUB_account_BLOCKED__c,0,0
nanoHUB_registration_date__c,2020-04-28,2020-04-28
nanoHUB_last_active_date__c,2020-04-28,2020-04-28


## To Salesforce Sales Cloud CRM

In [501]:
# Username: wang159-4j1v@force.com
# Security token (case-sensitive): eZ3PQpQqE3C3z4wWqEqKGhQ8

# consumer key: 3MVG95jctIhbyCppj0SNJ75IsZ1y8UPGZtSNF4j8FNVXz.De8Lu4jHm3rjRosAtsHy6qjHx3i4S_QbQzvBePG
# consumer secret: D1623C6D3607D4FC8004B92C761DFB6C1F70CCD129C5501E357028DFA00F5764

In [502]:
import requests

# obtain access token
response = requests.post("https://login.salesforce.com/services/oauth2/token", params=sf_login_params)
access_token = response.json()['access_token']

In [503]:
# Determine if using Composite or Bulk API

if (df_sf.shape[0] <= bulk_api_threshold) | (disallow_bulk_api):
    api_option = 'composite'
    
else:
    api_option = 'bulk'

print('%s API is selected' %api_option.capitalize())

Composite API is selected


In [504]:
# Compsite API: form JSON composite input
if api_option == 'composite':
    # prepare composite JSON fields
    df_sf['body'] = df_sf[sf_original_fields].drop('nanoHUB_user_ID__c', axis=1)\
                         .fillna('').to_dict(orient='records')
    df_sf['method'] = 'PATCH'
    df_sf['url'] = df_sf.nanoHUB_user_ID__c.apply(lambda x: composite_url+str(x))
    df_sf['referenceId'] = df_sf.nanoHUB_user_ID__c.apply(lambda x: 'Contact_'+str(x))

    record_list = df_sf[['method', 'url', 'referenceId', 'body']].to_dict(orient='records')

    record_total = len(record_list) # total number of records
    print('%d total records are found.' %record_total)

    record_index = 0
    response_list = list()

    while record_index < record_total:
        # form JSON for composite API call
        composite_json = {'compositeRequest': record_list[record_index:(record_index+composite_api_limit)]}

        # increase record index position
        record_index = record_index + composite_api_limit
        #pprint(composite_json)

        # Composite REST call to Salesforce
        response = requests.post('https://na172.salesforce.com/services/data/v43.0/composite', 
                            headers={"Authorization": "Bearer %s" %access_token},
                            json=composite_json)

        # save response
        response_list.append(response)

100 total records are found.


In [505]:
# Bulk API
if api_option == 'bulk':
    
    # Issuing a job request
    response = requests.post('https://na172.salesforce.com/services/data/v43.0/jobs/ingest/', 
                        headers={"Authorization": "Bearer %s" %access_token, 
                                 'Content-Type': 'application/json; charset=UTF-8',
                                 'Accept': 'application/json'},
                        json={
                                "object" : "Contact",
                                "externalIdFieldName" : 'nanoHUB_user_ID__c',
                                "contentType" : "CSV",
                                "operation" : "upsert"
                        })    
    
    if not response.ok:
        # job request not successful
        print('[FAIL] Bulk job creation failed ...')
        raise
    else:
        # job request successful
        print('[Success] Bulk job creation successful. Job ID = %s'%response.json()['id'])
    
    job_id = response.json()['id']
    
    # Save dataframe into CSV. Using Salesforce Bulk 2.0 API, CSV file should not exceed 150 MB
    bulk_csv = bytes(df_sf.to_csv(index=False), 'utf-8').decode('utf-8','ignore').encode("utf-8")
    
    # Put CSV content to bulk job
    # json={"body" : './temp_bulk.csv'}
    response = requests.put('https://na172.salesforce.com/services/data/v43.0/jobs/ingest/%s/batches/'%job_id, 
                            headers={"Authorization": "Bearer %s" %access_token, 
                                     'Content-Type': 'text/csv',
                                     'Accept': 'application/json'},
                            data = bulk_csv
                            )
    
    if not response.ok:
        # CSV upload not successful
        print('[FAIL] CSV upload failed ...')
        raise
    else:
        # CSV upload successful
        print('[Success] CSV upload successful. Job ID = %s'%job_id)
    
    # Close the job, so Salesforce can start processing data
    response = requests.patch('https://na172.salesforce.com/services/data/v43.0/jobs/ingest/%s'%job_id,
                        headers={"Authorization": "Bearer %s" %access_token, 
                                 'Content-Type': 'application/json; charset=UTF-8',
                                 'Accept': 'application/json'},
                        json={
                                "state" : "UploadComplete"
                        })  
    
    if not response.ok:
        # job close not successful
        print('[FAIL] Closing job failed ...')
        raise
    else:
        # job close successful
        print('[Success] Closing job successful. Job ID = %s'%job_id)
    

In [506]:
# Bulk API
if api_option == 'bulk':
    
    # check status
    response = requests.get('https://na172.salesforce.com/services/data/v43.0/jobs/ingest/%s'%job_id, 
                            headers={"Authorization": "Bearer %s" %access_token}
                            )
    
    display(response.json())