In [1]:
from simple_salesforce import Salesforce, SalesforceMalformedRequest
from argparse import ArgumentParser
from csv import DictWriter
from datetime import date
import time
from pathlib import Path
from nanoHUB.infrastructure.salesforce.contact import SalesforceFromEnvironment
from dotenv import load_dotenv
from nanoHUB.application import Application
from nanoHUB.configuration import ClusteringConfiguration
from nanoHUB.pipeline.geddes.data import get_default_s3_client
from nanoHUB.dataaccess.lake import S3FileMapper
import os
import pandas as pd

cwd = os.getcwd()
load_dotenv()

now = time.strftime("%Y%m%d-%H%M%S")
backup_folder = 'salesforce_backups' + '/' + now
print(backup_folder)

[1mnanoHUB - Serving Students, Researchers & Instructors[0m
salesforce_backups/20220623-144452


In [2]:
datadir = os.environ['APP_DIR'] + '/' + backup_folder
print('Saving Results -> Local dir: ' + datadir)

datapath = Path(datadir)
try:
  datapath.mkdir(parents=True) #in python 3.5 we can switch to using  exist_ok=True
except FileExistsError:
  pass

Saving Results -> Local dir: /home/saxenap/nanoHUB/salesforce_backups/20220623-144452


In [3]:
application = Application.get_instance()

s3_client = get_default_s3_client(application)
raw_mapper = S3FileMapper(s3_client, ClusteringConfiguration().bucket_name_raw)

In [4]:
sf = SalesforceFromEnvironment('dev').create_new()
print(sf)

<simple_salesforce.api.Salesforce object at 0x7f46e9d0f370>


In [5]:
description = sf.describe()

In [6]:
names = [obj['name'] for obj in description['sobjects'] if obj['queryable']]
print(names)

['AIApplication', 'AIApplicationConfig', 'AIInsightAction', 'AIInsightFeedback', 'AIInsightReason', 'AIInsightValue', 'AIRecordInsight', 'AcceptedEventRelation', 'Account', 'AccountContactRole', 'AccountFeed', 'AccountHistory', 'AccountPartner', 'AccountShare', 'ActionLinkGroupTemplate', 'ActionLinkTemplate', 'ActiveFeatureLicenseMetric', 'ActivePermSetLicenseMetric', 'ActiveProfileMetric', 'AdditionalNumber', 'AgentWork', 'AgentWorkShare', 'Announcement', 'ApexClass', 'ApexComponent', 'ApexEmailNotification', 'ApexLog', 'ApexPage', 'ApexPageInfo', 'ApexTestQueueItem', 'ApexTestResult', 'ApexTestResultLimits', 'ApexTestRunResult', 'ApexTestSuite', 'ApexTrigger', 'AppAnalyticsQueryRequest', 'AppDefinition', 'AppMenuItem', 'AppTabMember', 'Asset', 'AssetFeed', 'AssetHistory', 'AssetRelationship', 'AssetRelationshipFeed', 'AssetRelationshipHistory', 'AssetShare', 'AssignmentRule', 'AssociatedLocation', 'AssociatedLocationHistory', 'AsyncApexJob', 'Attachment', 'AuraDefinition', 'AuraDefin

In [None]:
name = 'Contact'
salesforceObject = sf.__getattr__(name)
fieldNames = [field['name'] for field in salesforceObject.describe()['fields']]

try:
    results = sf.query_all( "SELECT " + ", ".join(fieldNames) + " FROM " + name + " WHERE nanoHUB_username__c = tfaltens" )
    print(results)
    # df = pd.DataFrame.from_dict(results, orient='columns').drop('attributes',axis=1)
    # print(df)
except SalesforceMalformedRequest as e:
    print(e)
exit(0)     

for name in names:
    # salesforceObject = sf.__getattr__(name)
    salesforceObject = sf.__getattr__('Contact')
    # so get a list of the object fields for this object.
  
    fieldNames = [field['name'] for field in salesforceObject.describe()['fields']]
    # then build a SOQL query against that object and do a query_all
    try:
        results = sf.query_all( "SELECT " + ", ".join(fieldNames) + " FROM " + name  )
        print(results)
        df = pd.DataFrame.from_dict(results, orient='columns').drop('attributes',axis=1)
        print(df)
        exit(0)
    except SalesforceMalformedRequest as e:
        # ignore objects with rules about getting queried. 
        continue
    outputfile = datapath / (name+".csv")
    print(df)
    processed_mapper.save_as_csv(df, backup_folder + '/' + name + '.csv', index=None)
    df.to_csv(datadir + '/' + name + '.csv', index=None)

In [None]:
def get_df_for(name: str):
    salesforceObject = sf.__getattr__(name)
    fieldNames = [field['name'] for field in salesforceObject.describe()['fields']]
    try:
        results = sf.query_all( "SELECT " + ", ".join(fieldNames) + " FROM " + name  )
        print(results)
        # df = pd.DataFrame.from_dict(results, orient='columns').drop('attributes',axis=1)
        # print(df)
    except SalesforceMalformedRequest as e:
        print(e)
    exit(0)  