# DB2-Salesforce connector: pull in new email flags from DB2

In [1]:
# Parameters
# API settings
api_url = '/services/data/v43.0/sobjects'
external_id = 'nanoHUB_user_ID__c'
object_id = 'Contact'

# login parameters to be handled by Papermill
from nanoHUB.application import Application

application = Application.get_instance()
nanohub_db = application.new_db_engine('nanohub')

salesforce = application.new_salesforce_engine()
db_s = salesforce

[1mnanoHUB - Serving Students, Researchers & Instructors[0m
Obtained Salesforce access token ...... True


In [2]:
import pandas as pd
import datetime

In [3]:
sql_query_pc = '''
SELECT user_id AS nanoHUB_user_ID__c,
    CASE
        WHEN profile_value LIKE 'yes%%' THEN 1
        ELSE 0
      END AS personalizedcommunication__c
FROM nanohub.jos_user_profiles
WHERE profile_key in ('personalizedcommunication')
;
'''

In [4]:
pc_df = pd.read_sql_query(sql_query_pc, nanohub_db)
print(pc_df.info())
print(pc_df.tail())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 272921 entries, 0 to 272920
Data columns (total 2 columns):
 #   Column                        Non-Null Count   Dtype
---  ------                        --------------   -----
 0   nanoHUB_user_ID__c            272921 non-null  int64
 1   personalizedcommunication__c  272921 non-null  int64
dtypes: int64(2)
memory usage: 4.2 MB
None
        nanoHUB_user_ID__c  personalizedcommunication__c
272916              370546                             0
272917              370551                             1
272918              370552                             1
272919              370553                             1
272920              370554                             1


In [5]:
sql_query_un = '''
SELECT user_id AS nanoHUB_user_ID__c,
      CASE
        WHEN profile_value LIKE 'yes%%' THEN 1
        ELSE 0
      END AS updatesnews__c
FROM nanohub.jos_user_profiles
WHERE profile_key in ('updates_news')
;
'''

In [6]:
un_df = pd.read_sql_query(sql_query_un, nanohub_db)
print(un_df.info())
print(un_df.tail())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 271588 entries, 0 to 271587
Data columns (total 2 columns):
 #   Column              Non-Null Count   Dtype
---  ------              --------------   -----
 0   nanoHUB_user_ID__c  271588 non-null  int64
 1   updatesnews__c      271588 non-null  int64
dtypes: int64(2)
memory usage: 4.1 MB
None
        nanoHUB_user_ID__c  updatesnews__c
271583              370546               0
271584              370551               1
271585              370552               1
271586              370553               1
271587              370554               1


In [7]:
un_df['g'] = un_df.groupby('nanoHUB_user_ID__c').cumcount()
pc_df['g'] = pc_df.groupby('nanoHUB_user_ID__c').cumcount()
merged_df = un_df.merge(pc_df, how='outer').drop('g',1)
merged_df.fillna(1, inplace=True)

print(merged_df.head())
print(merged_df.tail())
print(merged_df.info())

  merged_df = un_df.merge(pc_df, how='outer').drop('g',1)
   nanoHUB_user_ID__c  updatesnews__c  personalizedcommunication__c
0                 998             0.0                           1.0
1                1000             0.0                           1.0
2                1683             0.0                           1.0
3                1684             0.0                           1.0
4                1685             0.0                           1.0
        nanoHUB_user_ID__c  updatesnews__c  personalizedcommunication__c
273013              359369             1.0                           0.0
273014              369414             1.0                           0.0
273015              369414             1.0                           0.0
273016              369414             1.0                           0.0
273017              328084             1.0                           1.0
<class 'pandas.core.frame.DataFrame'>
Int64Index: 273018 entries, 0 to 273017
Data columns (tota

In [8]:
print(merged_df.loc[merged_df['nanoHUB_user_ID__c'] == 325902])

        nanoHUB_user_ID__c  updatesnews__c  personalizedcommunication__c
237427              325902             0.0                           0.0


In [None]:
print(merged_df.loc[merged_df['nanoHUB_user_ID__c'] == 314658])

In [None]:
print(merged_df.loc[merged_df['nanoHUB_user_ID__c'] == 9533])

## Send to SF

In [9]:
db_s.object_id = object_id
db_s.external_id = external_id

# send data to Salesforce
db_s.send_data(merged_df)


[Success] Bulk job creation successful. Job ID = 7508W00000i7HakQAE
hello
[Success] CSV upload successful. Job ID = 7508W00000i7HakQAE
[Success] Closing job successful. Job ID = 7508W00000i7HakQAE


In [10]:
# check status
db_s.check_bulk_status()

{'id': '7508W00000i7HakQAE',
 'operation': 'upsert',
 'object': 'Contact',
 'createdById': '0055w00000DM5bOAAT',
 'createdDate': '2022-06-30T05:25:01.000+0000',
 'systemModstamp': '2022-06-30T05:25:04.000+0000',
 'state': 'UploadComplete',
 'externalIdFieldName': 'nanoHUB_user_ID__c',
 'concurrencyMode': 'Parallel',
 'contentType': 'CSV',
 'apiVersion': 47.0,
 'jobType': 'V2Ingest',
 'lineEnding': 'LF',
 'columnDelimiter': 'COMMA',
 'numberRecordsProcessed': 0,
 'numberRecordsFailed': 0,
 'retries': 0,
 'totalProcessingTime': 0,
 'apiActiveProcessingTime': 0,
 'apexProcessingTime': 0}

In [11]:
# check status
from pprint import pprint

pprint(db_s.check_bulk_failed_results())

''
