## simulation user lifetimes 

In [1]:
import sys
import pandas as pd
import time
import datetime

In [2]:
## warnings handler
import warnings
warnings.filterwarnings('ignore')
# warnings.filterwarnings(action='once')
warnings.simplefilter(action='ignore', category=FutureWarning)

In [3]:
from nanoHUB.application import Application

application = Application.get_instance()
nanohub_db = application.new_db_engine('nanohub')
metrics_db = application.new_db_engine('nanohub_metrics')

salesforce = application.new_salesforce_engine()
db_s = salesforce

[1mnanoHUB - Serving Students, Researchers & Instructors[0m
[INFO] [DB2SalesforceAPI - root]: Obtained Salesforce access token ...... True [DB2SalesforceAPI.obtain_token:54]


In [4]:
sql_query = '''
SELECT *
FROM (
       SELECT DISTINCT id,
                       user,
                       MIN(COALESCE(datetime)) AS min_datetime,
                       MAX(COALESCE(datetime)) AS max_datetime,
                       CASE
                         WHEN datediff(MAX(COALESCE(datetime)), MIN(COALESCE(datetime))) = 0
                           THEN 1
                         ELSE
                           datediff(MAX(COALESCE(datetime)), MIN(COALESCE(datetime)))
                         END
                                               AS sim_lifetime
       FROM nanohub_metrics.toolstart
       WHERE (datetime != 0 OR datetime != '') 
       GROUP BY user
       UNION
       SELECT DISTINCT id,
                       user,
                       '1980-00-00 00:00:00' AS min_datetime,
                       '1980-00-00 00:00:00' AS max_datetime,
                       'indeterminate'       AS sim_lifetime
       FROM nanohub_metrics.toolstart
       GROUP BY user
       HAVING COALESCE(SUM(datetime), 0) = 0
     ) A
WHERE A.user != '';
'''
#"select min(datetime), max(datetime), user from toolstart group by user"
lifetimes_df = pd.read_sql_query(sql_query,metrics_db)

In [5]:
# remove garbage usernames
lifetimes_df = lifetimes_df[lifetimes_df['user'] != '']

In [6]:
lifetimes_df

Unnamed: 0,id,user,min_datetime,max_datetime,sim_lifetime
0,8486100,0.yao.yuan,2017-03-08 10:15:23,2017-03-09 05:09:22,1
1,7813898,008dilip,2016-09-28 20:27:13,2016-10-12 23:50:56,14
2,5495446,00ff,2013-11-19 00:53:23,2013-11-19 00:58:56,1
3,8537204,00thamizharasi00,2017-03-20 09:46:36,2017-03-20 09:46:36,1
4,24637464,01024461660a,2020-09-23 18:14:20,2020-12-11 02:58:30,79
...,...,...,...,...,...
157411,26348982,zzzwmhq,2021-07-05 13:19:19,2021-07-05 13:27:28,1
157412,719,z_depth,2002-07-23 02:52:49,2002-07-23 03:04:24,1
157413,577288,Z_E_U_S,2006-10-16 20:25:49,2006-10-16 20:27:53,1
157414,5078871,z_henry35,2013-02-11 14:42:41,2013-02-19 01:53:39,8


In [7]:
lifetimes_df = lifetimes_df.rename(columns={'user':'nanoHUB_username__c','sim_lifetime':'sim_lifetimes__c',\
                    'min_datetime':'earliest_sim__c','max_datetime':'latest_sim__c'})

In [8]:
lifetimes_df

Unnamed: 0,id,nanoHUB_username__c,earliest_sim__c,latest_sim__c,sim_lifetimes__c
0,8486100,0.yao.yuan,2017-03-08 10:15:23,2017-03-09 05:09:22,1
1,7813898,008dilip,2016-09-28 20:27:13,2016-10-12 23:50:56,14
2,5495446,00ff,2013-11-19 00:53:23,2013-11-19 00:58:56,1
3,8537204,00thamizharasi00,2017-03-20 09:46:36,2017-03-20 09:46:36,1
4,24637464,01024461660a,2020-09-23 18:14:20,2020-12-11 02:58:30,79
...,...,...,...,...,...
157411,26348982,zzzwmhq,2021-07-05 13:19:19,2021-07-05 13:27:28,1
157412,719,z_depth,2002-07-23 02:52:49,2002-07-23 03:04:24,1
157413,577288,Z_E_U_S,2006-10-16 20:25:49,2006-10-16 20:27:53,1
157414,5078871,z_henry35,2013-02-11 14:42:41,2013-02-19 01:53:39,8


In [9]:
print(lifetimes_df['nanoHUB_username__c'].to_list().index('drjohnbegg'))

37722


In [10]:
print(lifetimes_df.iloc[37081,:]) ## a sanity check

id                                 8239889
nanoHUB_username__c                 dongws
earliest_sim__c        2017-01-23 16:35:49
latest_sim__c          2017-03-29 22:21:41
sim_lifetimes__c                        65
Name: 37081, dtype: object


In [11]:
print(lifetimes_df.iloc[lifetimes_df['nanoHUB_username__c'].to_list().index('drjohnbegg'),:]) ## a sanity check

id                                22723493
nanoHUB_username__c             drjohnbegg
earliest_sim__c        2020-02-10 01:55:57
latest_sim__c          2021-09-23 04:55:26
sim_lifetimes__c                       591
Name: 37722, dtype: object


In [12]:
print(lifetimes_df.iloc[lifetimes_df['nanoHUB_username__c'].to_list().index('faltens'),:]) ## a sanity check

id                                 4867414
nanoHUB_username__c                faltens
earliest_sim__c        2009-10-27 22:45:33
latest_sim__c          2021-09-27 20:40:12
sim_lifetimes__c                      4353
Name: 44063, dtype: object


In [13]:
print(lifetimes_df.iloc[lifetimes_df['nanoHUB_username__c'].to_list().index('gekco'),:]) ## a sanity check

id                                  338947
nanoHUB_username__c                  gekco
earliest_sim__c        2004-02-02 12:58:12
latest_sim__c          2021-09-26 12:12:39
sim_lifetimes__c                      6446
Name: 48400, dtype: object


In [14]:
## send to salesforce

In [15]:
db_s.external_id = 'nanoHUB_username__c' #external_id
db_s.object_id = 'Contact' #object_id

In [16]:
db_s.send_data(lifetimes_df)

[Success] Bulk job creation successful. Job ID = 7505w00000aGLLSAA4
hello
[Success] CSV upload successful. Job ID = 7505w00000aGLLSAA4
[Success] Closing job successful. Job ID = 7505w00000aGLLSAA4


In [17]:
db_s.check_bulk_status()

{'id': '7505w00000aGLLSAA4',
 'operation': 'upsert',
 'object': 'Contact',
 'createdById': '0055w00000DM5bOAAT',
 'createdDate': '2021-09-29T02:15:36.000+0000',
 'systemModstamp': '2021-09-29T02:15:51.000+0000',
 'state': 'UploadComplete',
 'externalIdFieldName': 'nanoHUB_username__c',
 'concurrencyMode': 'Parallel',
 'contentType': 'CSV',
 'apiVersion': 47.0,
 'jobType': 'V2Ingest',
 'lineEnding': 'LF',
 'columnDelimiter': 'COMMA',
 'numberRecordsProcessed': 0,
 'numberRecordsFailed': 0,
 'retries': 0,
 'totalProcessingTime': 0,
 'apiActiveProcessingTime': 0,
 'apexProcessingTime': 0}

In [18]:
import pprint
pprint.pprint(db_s.check_bulk_failed_results())

''
