## general_import.py transposed to a .ipynb file for debugging

In [1]:
import warnings
warnings.filterwarnings('ignore')
# warnings.filterwarnings(action='once')
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
import numpy as np
import pandas as pd
import os, sys
from pathlib import Path
import platform
from copy import deepcopy
from nanoHUB.application import Application
from googleapiclient.discovery import build
from apiclient import errors
from oauth2client.service_account import ServiceAccountCredentials
from httplib2 import Http
from nanoHUB.logger import logger
application = Application.get_instance()
# nanohub_db = application.new_db_engine('nanohub')
# nanohub_metrics_db = application.new_db_engine('nanohub_metrics')
# wang159_myrmekes_db = application.new_db_engine('wang159_myrmekes')

salesforce = application.new_salesforce_engine()
db_s = salesforce
log = logger('nanoHUB:google_imports')

[1mnanoHUB - Serving Students, Researchers & Instructors[0m
Obtained Salesforce access token ...... True


## Setup GDrive API

In [3]:
from __future__ import print_function
import os.path
import os
import errno
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google.oauth2 import service_account

## stuff that's rather hard to find from documentation
from googleapiclient.http import MediaFileUpload, MediaIoBaseDownload

In [4]:
FOLDER_NAME = 'Salesforce_Imports'
TO_IMPORT_FOLDER_NAME = 'To_Import'
FAILURES_FOLDER_NAME = 'Import_Issues'
IMPORTED_FOLDER_NAME = 'Imported'

service = application.new_google_api_engine()

In [5]:
import_dir = os.getenv('APP_DIR') + '/.cache/SF_Imports'
Path(import_dir + '/' + FOLDER_NAME).mkdir(parents=True, exist_ok=True)
Path(import_dir + '/' + TO_IMPORT_FOLDER_NAME).mkdir(parents=True, exist_ok=True)
Path(import_dir + '/' + FAILURES_FOLDER_NAME).mkdir(parents=True, exist_ok=True)
Path(import_dir + '/' + IMPORTED_FOLDER_NAME).mkdir(parents=True, exist_ok=True)

In [6]:
def get_folder_id(service, folder_name: str):
    response = service.files().list(
        q = "mimeType = 'application/vnd.google-apps.folder' and name = '" + folder_name + "'",
        spaces='drive',
        fields="files(id, name)"
    ).execute()
    folder = response.get('files', [])[0]
    return folder.get('id')

def get_subfolder_id(service, parent_folder_id: str, subfolder_name: str):
    response = service.files().list(
        q = "mimeType = 'application/vnd.google-apps.folder' and name = '" + subfolder_name + "'" + " and '" + parent_folder_id + "' in parents",
        spaces='drive',
        fields="files(id, name)"
    ).execute()
    folder = response.get('files', [])[0]
    return folder.get('id')

def change_folder_for_file(file_id: str, old_folder_id: str, new_folder_id: str):
    return service.files().update(
        fileId=file_id,
        removeParents=old_folder_id,
        addParents=new_folder_id,
        fields='id, parents'
    ).execute()

In [7]:
folder_id = get_folder_id(service, FOLDER_NAME)
log.info('Found folder: %s (%s)' % (FOLDER_NAME, folder_id))

[INFO] [191429788 - nanoHUB:google_imports]: Found folder: Salesforce_Imports (1xwtRsGEcBNW7LItcgoekxxZADk7Ghy-d) [191429788.<module>:2]


In [8]:
subfolder_id = get_subfolder_id(service, folder_id, TO_IMPORT_FOLDER_NAME)
log.info('Found subfolder: %s (%s)' % (TO_IMPORT_FOLDER_NAME, subfolder_id))

[INFO] [285691732 - nanoHUB:google_imports]: Found subfolder: To_Import (1vjnRjaPcEeK82Ye3_BmjMEmXU51Y9FL-) [285691732.<module>:2]


In [9]:
mimetypes = """
mimeType = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or
mimeType = 'application/vnd.ms-excel' or
mimeType = 'text/csv'
"""
file_ids = []
file_names = []
page_token = None
# q = "(" + mimetypes + ") and '" + folder.get('id') + "' in parents"
while True:
    query = "(" + mimetypes + ") and '" + subfolder_id + "' in parents"
    response = service.files().list(
        q = query,
        spaces='drive',
        pageToken=page_token,
        fields="nextPageToken, files(id, name)"
    ).execute()
    files = response.get('files', [])
    for file in files:
        log.info('Found file: %s (%s)' % (file.get('name'), file.get('id')))
        file_names.append(file['name'])
        file_ids.append(file['id'])
    page_token = response.get('nextPageToken', None)
    if page_token is None:
        break

[INFO] [2186677928 - nanoHUB:google_imports]: Found file: ML Webinar 11 03 21 - R.csv (1AlQZK8mRPRDX-WuvzwMM58-9aTPatl7r) [2186677928.<module>:20]


In [10]:
import io 
import shutil 

In [11]:
try:
    for temp_index,f_tbd_id in enumerate(file_ids):
        request = service.files().get_media(fileId=f_tbd_id) #,mimeType='text/csv') #if not .csv, then do .export()
        fh = io.BytesIO()
        downloader = MediaIoBaseDownload(fh, request)
        done = False
        while done is False:
            status, done = downloader.next_chunk()
            log.info("Download %d%%." % int(status.progress() * 100))

        # The file has been downloaded into RAM, now save it in a file
        # https://stackoverflow.com/questions/60111361/how-to-download-a-file-from-google-drive-using-python-and-the-drive-api-v3
        fh.seek(0)
        download_filepath = import_dir + '/' + file_names[temp_index]
        if not os.path.exists(os.path.dirname(download_filepath)):
            try:
                os.makedirs(os.path.dirname(download_filepath))
            except OSError as exc:
                if exc.errno != errno.EEXIST:
                    raise

        with open(download_filepath, 'wb') as f:
            log.info(f)
            shutil.copyfileobj(fh, f) #, length=131072)

    log.info("Finished downloading files")
except Exception as e:
    log.info("Error downloading file: " + str(e))
    raise

[INFO] [2159645865 - nanoHUB:google_imports]: Download 100%. [2159645865.<module>:9]
[INFO] [2159645865 - nanoHUB:google_imports]: <_io.BufferedWriter name='/home/saxenap/nanoHUB/.cache/SF_Imports/ML Webinar 11 03 21 - R.csv'> [2159645865.<module>:23]
[INFO] [2159645865 - nanoHUB:google_imports]: Finished downloading files [2159645865.<module>:26]


In [12]:
pd.set_option('display.max_columns', 500)
# pd.set_option('display.max_rows', 500)

In [13]:
list_files = [name for name in os.listdir(import_dir)] #if os.path.isfile(name)]
log.info(list_files)

[INFO] [2122226748 - nanoHUB:google_imports]: ['Import_Issues', 'Imported', 'ML Bayesian Optimization Webinar – A.csv', 'ML Webinar 11 03 21 - R.csv', 'Salesforce_Imports', 'To_Import'] [2122226748.<module>:2]


## Perform Sequential Imports

In [14]:
import time

In [15]:
success_files = []
fail_files = []

In [16]:
from chardet import detect
import cchardet
# get file encoding type
def get_encoding_type(file_path):
    with open(file_path, 'rb') as f:
        rawdata = f.read()
    return detect(rawdata)

In [17]:

from charset_normalizer import CharsetNormalizerMatches as CnM


log.info("\n")
for file in file_names:
    file_path = import_dir + '/' + file
    log.info("Processing file: " + file + '---->')

    from_chardet = get_encoding_type(file_path)
    log.info('From chardet ----> ' + from_chardet['encoding'])

    from_normalizer = CnM.from_path(file_path).best().first().encoding
    log.info('From Normalizer ----> ' + from_normalizer)



from charset_normalizer import from_path

def change_encoding(file_path: str):
    try:
        results = from_path(file_path)
        with open(file_path, 'w') as filetowrite:
            filetowrite.write(results)
    except IOError as e:
        log.info('Sadly, we are unable to perform charset normalization.', str(e))

[INFO] [1881040374 - nanoHUB:google_imports]: 
 [1881040374.<module>:4]
[INFO] [1881040374 - nanoHUB:google_imports]: Processing file: ML Webinar 11 03 21 - R.csv----> [1881040374.<module>:7]
[INFO] [1881040374 - nanoHUB:google_imports]: From chardet ----> Windows-1252 [1881040374.<module>:10]
[INFO] [1881040374 - nanoHUB:google_imports]: From Normalizer ----> cp1250 [1881040374.<module>:13]


In [18]:

for file in file_names:
    idf = pd.DataFrame()
    log.info("Reading %s" % file)
    try:
        f_type = file.split('.')[-1]
        log.info("File type %s" % f_type)
        #pandas import # add exception handling - UnicodeError
        if f_type == 'csv':
            try:
                idf = pd.read_csv(import_dir + '/' +file,encoding='utf-8')
                log.info('Loaded csv with encoding utf-8.')
            except:
                try:
                    idf = pd.read_csv(import_dir + '/' +file,encoding='cp1252')
                    log.info('Loaded csv with encoding cp1252 now.')
                except:
                    try:
                        idf = pd.read_csv(import_dir + '/' +file,encoding='utf-16')
                        log.info('Loaded csv with encoding utf-16 now.')
                    except:
                        try:
                            idf = pd.read_csv(import_dir + '/' +file,encoding='cp1254')
                            log.info('Loaded csv with encoding cp1254 now.')
                        except:
                            try:
                                idf = pd.read_csv(import_dir + '/' +file,encoding='cp775')
                                log.info('Loaded csv with encoding cp775 now.')
                            except:
                                try:
                                    idf = pd.read_csv(import_dir + '/' +file,encoding='cp1252',sep='\t')
                                    log.info('Loaded csv with encoding cp1252 now.')
                                except:
                                    try:
                                        idf = pd.read_csv(import_dir + '/' +file,encoding='utf-16',sep='\t')
                                        log.info('Loaded csv with encoding utf-16 now.')
                                    except:
                                        try:
                                            idf = pd.read_csv(import_dir + '/' +file,encoding='cp1254',sep='\t')
                                            log.info('Loaded csv with encoding cp1254 now.')
                                        except:
                                            try:
                                                idf = pd.read_csv(import_dir + '/' +file,encoding='cp775',sep='\t')
                                                log.info('Loaded csv with encoding cp775 now.')
                                            except:
                                                log.info('CSV import failed. Possible encoding issues with file %s.' % file)
                                                raise TypeError
        elif f_type == 'xlsx' or f_type == 'xls':
            try:
                xl = pd.ExcelFile(import_dir + '/' +file)
                log.info("sheet names: " + xl.sheet_names)# see all sheet names
                sheet_names = xl.parse(xl.sheet_names) #this already performs an import
                idf = pd.read_excel(import_dir + '/' +file,sheet_name=xl.sheet_names[0],header=0)#,skiprows=1)
            except:
                log.info('error bad lines, csv/xls/xlsx import failed')
                raise TypeError

        ## remove leading and trailing spaces ## add str.strip spaces for all columns and rename
        log.info(idf.columns)
        prev_idf_cols = idf.columns
        idf_cols = [i.strip() for i in idf.columns]
        idf.columns = idf.columns.str.strip()
        log.info(idf.columns)
        idf = idf.rename(columns={j:idf_cols[i] for i,j in enumerate(prev_idf_cols)})
    #     display(idf.columns)
    #     # log.info(prev_idf_cols)
    #     # log.info(idf_cols)

        # if engagement venue does not exist, then create a flag with that entry
        try:
            #if engagement venue is specified
            log.info(idf['Engagement Venue'])
            log.info(idf['First Name'])
            idf = idf.rename(columns={'Engagement Venue':'Venue__c'})
            # idf = idf.rename(columns={'First Name':'firstname','Last Name':'lastname'})
        except:
            #decide the event
            #event_extract = xl.sheet_names[0]
            #idf['Engagement Venue'] = event_extract
            idf['Venue__c'] = file.split('.')[0]

            try:
                #name extract
                names = idf['Name'].to_list()
                from copy import deepcopy?
                fname = deepcopy(names)
                lname = deepcopy(names)
                for ind, val in enumerate(names):
                    val = val.split(' ')
                    fname[ind] = val[-1]
                    lname[ind] = val[0]

                idf['firstname'] = fname
                idf['lastname'] = lname
                name_flag = True
            except:
                name_flag = False
                log.info('no name')

        #rename columns
        idf = idf.rename(columns={'email':'Email','EMAIL':'Email','E-mail Address':'Email',\
                    'Email Address':'Email','Recipient Email':'Email'})
        idf = idf.rename(columns={'Engagement Venue':'Venue__c'})
        idf = idf.rename(columns={'First Name':'firstname','Last Name':'lastname','FirstName':'firstname','LastName':'lastname'})

        idf = idf.rename(columns={0:'Email'})
        if name_flag == True:
            idf = idf.drop(columns='Name')#['NAME','LAST NAME','FIRST NAME'])            
        idf = idf.dropna(subset=['Email'])

        log.info(idf.columns)
        #email check rows
        grows = []
        brows = []
        for ind,val in enumerate(idf['Email'].to_list()):
            if '@' in val:
                grows.append(ind)
            else:
                brows.append(ind)    
        idf = idf.iloc[grows,:].reset_index().iloc[:,1:]        

        log.info(len(grows))
        log.info(len(brows))

        ## Import in contacts
        os_name = os.name
        sys_name = platform.system() #Linux, Darwin, Windows    

        # salesforce queries for contact data
        # deciding the queries
        import_df_cols = deepcopy(idf.columns)
        nh_id_flag = False
        email_flag = False
        if 'nanoHUB_user_ID__c' in import_df_cols:
            nh_id_flag = True

        if 'Email' in import_df_cols:
            email_flag = True    

        if nh_id_flag == True and email_flag == True:
            sf_df = db_s.query_data('SELECT Id,nanoHUB_user_ID__c, Email, Venue__c FROM Contact')#,sys_name=sys_name)
        elif email_flag == True:
            sf_df = db_s.query_data('SELECT Id,nanoHUB_user_ID__c, Email, Venue__c FROM Contact')#,sys_name=sys_name)    

        # find all existing contacts
        sf_emails = sf_df['Email'].to_list()
        grows = []
        brows = [] #dont need the sf_bad_rows as send to leads
        sf_grows = []
        for ind,val in enumerate(idf['Email'].to_list()):
            val = val.strip()
            if val in sf_emails:
                grows.append(ind)
                sf_grows.append(sf_emails.index(val))
            else:
                brows.append(ind)   

        # pull the matching SF entries and the matching import df entries
        sf_df_match = sf_df.iloc[sf_grows,:].reset_index().iloc[:,1:]
        idf_match = idf.iloc[grows,:].reset_index().iloc[:,1:]
        lead_df = idf.iloc[brows,:].reset_index().iloc[:,1:] #use this in next section

        log.info(sf_df_match.head(2))
        log.info(idf_match.head(2))

        # linear join since the sequence is matching
        for ind,val in enumerate(sf_df_match['Venue__c']):
            try:
                val = val.split(';')
                val.append(idf['Venue__c'][ind])
                val = ';'.join(val)
            except:
                val = idf['Venue__c'][ind]
            sf_df_match['Venue__c'][ind] = val

        log.info(sf_df_match.head(5))

        ## delete duplicates
        venues = sf_df_match['Venue__c'].to_list()
        for ind,val in enumerate(venues):
            venues[ind]=';'.join(list(dict.fromkeys(val.split(';'))))
        sf_df_match['Venue__c'] = venues
    #     display(sf_df_match.head(5))

        ## encoding correction for dashes
        venues = sf_df_match['Venue__c'].apply(lambda x: x.replace('â\x80\x93','-'))
        sf_df_match['Venue__c'] = venues

        sf_df_match = sf_df_match.drop_duplicates()
    #     display(sf_df_match.head(5))

        sf_df_match = sf_df_match[['Email','Venue__c','Id']]
    #     display(sf_df_match.head(5))

        ## send to SF
        # rebuild api object
        db_s_c = deepcopy(db_s)

        # send data to SF
        db_s_c.object_id = 'Contact'
        # db_s_c.external_id = 'nanoHUB_user_ID__c'
        db_s_c.external_id = 'Id'

        db_s_c.send_data(sf_df_match)

        ## find leads and send them to SF as well
        #pull all current leads
        sf_df = db_s.query_data('SELECT Id, Email, Venue__c, SF_indexer__c FROM Lead')    
        # find the max sf_indexer
        indexers = sf_df['SF_indexer__c'].fillna(0).to_list()
        max_ind = max(indexers)    

        # find all existing leads
        sf_emails = sf_df['Email'].to_list()
        m_rows = []
        nm_rows = [] #don't need sf no match rows
        sf_mrows = []    

        for ind,val in enumerate(lead_df['Email'].to_list()):
            val = val.strip()
            if val in sf_emails:
                m_rows.append(ind)
                sf_mrows.append(sf_emails.index(val))
            else:
                nm_rows.append(ind)    

        # filter the matches
        sf_df_match = sf_df.iloc[sf_mrows,:].reset_index().iloc[:,1:]
        join_idf = lead_df.iloc[m_rows,:].reset_index().iloc[:,1:]
        new_idf = lead_df.iloc[nm_rows,:].reset_index().iloc[:,1:]    

        # linear join since the sequence is matching
        for ind,val in enumerate(sf_df_match['Venue__c']):
            try:
                val = val.split(';')
                #if 'MSE Summer Webinar Series 2020' in val:
                #    val.remove('MSE Summer Webinar Series 2020')
                #    if 'MSE Summer Webinar Series 2020' in val:
                #        val.remove('MSE Summer Webinar Series 2020')
                val.append(join_idf['Venue__c'][ind])
                val = ';'.join(val)
            except:
                val = join_idf['Venue__c'][ind]
            sf_df_match['Venue__c'][ind] = val    

        ## delete duplicates
        venues = sf_df_match['Venue__c'].to_list()
        for ind,val in enumerate(venues):
            venues[ind] = ';'.join(list(dict.fromkeys(val.split(';'))))
        sf_df_match['Venue__c'] = venues

        # assign new SF_indexers for the new leads
        new_max_ind = int(max_ind+new_idf.shape[0])
        new_idf['SF_indexer__c'] = range(int(max_ind)+1,new_max_ind+1)    

        # need non-empty company field
        new_idf['Company'] = '-'    

        # ensure 'â\x80\x93' has been replaced
        sf_df_match['Venue__c'] = sf_df_match['Venue__c'].apply(lambda x: x.replace('â\x80\x93','-'))
        new_idf['Venue__c'] = new_idf['Venue__c'].apply(lambda x: x.replace('â\x80\x93','-'))

        sf_df_match['Company'] = '-'
        new_idf['Company'] = '-'
        log.info(new_idf)
        # sys.exit()

        # populate the company fields for sf_df_match and new_idf by comparing email addresses
        if 'Company' in idf.columns:
            log.info("company exist, using it")
            # comparison for sf_df_match

            sf_df_match_comp_ind = sf_df_match.columns.to_list().index('Company')
            for ind,val in enumerate(idf['Email'].to_list()):
                if val in sf_df_match['Email'].to_list():
                    sf_df_match_ind = sf_df_match['Email'].to_list().index(val)
                    sf_df_match.iloc[sf_df_match_ind,sf_df_match_comp_ind] = deepcopy(idf['Company'].to_list()[ind])

            # sf_df_match sf_indexer if not available
            sf_df_match_sf_ind = sf_df_match.columns.to_list().index('SF_indexer__c')
            new_max_ind = int(max_ind+new_idf.shape[0])
            for ind,val in enumerate(sf_df_match['SF_indexer__c'].to_list()):
                if type(val) != int and type(val) != float:
                    sf_df_match.iloc[ind,sf_df_match_sf_ind] = new_max_ind+1
                    new_max_ind += 1

            new_idf_comp_ind = new_idf.columns.to_list().index('Company')
            for ind,val in enumerate(idf['Email'].to_list()):
                if val in new_idf['Email'].to_list():
                    new_idf_ind = new_idf['Email'].to_list().index(val)
                    new_idf.iloc[new_idf_ind,new_idf_comp_ind] = deepcopy(idf['Company'].to_list()[ind])
        else:
            log.info('no company in import list, set to -')    
        
        if sf_df_match.shape[0] == 0 and new_idf.shape[0] == 0: 
            log.info('no leads to import, they were all contacts')
        else:
            sf_df_match = sf_df_match.fillna('-')
            new_idf = new_idf.fillna('-')

            try:
                new_idf = new_idf.rename(columns={'First Name':'firstname','Last Name':'lastname'})
            except:
                log.info('names are good')          

            try:
                new_idf = new_idf[['Email','firstname','lastname','SF_indexer__c','Venue__c']]
                new_idf['Company'] = '-'
            except: # names are not present
                try:
                    tempnames = new_idf['Faculty'].apply(lambda x: x.split(' '))
                    tf_names = [i[0] for i in tempnames]
                    tl_names = [i[-1] if len(i[-1]) > 0 else '-' for i in tempnames]
                    new_idf['firstname'] = tf_names
                    new_idf['lastname'] = tl_names
                    new_idf = new_idf[['Email','firstname','lastname','SF_indexer__c','Venue__c']]
                    new_idf['Company'] = '-'    
                except:
                    tempnames = new_idf['Name'].to_list()
                    temp_fname = []
                    temp_lname = []
                    for t_ind,t_val in enumerate(tempnames):
                        t_val = t_val.split(' ')
                        temp_fname.append(t_val[0])
                        if len(t_val[-1]) > 0:
                            temp_lname.append(t_val[-1])
                        else:
                            temp_lname.append('-')

                    new_idf['firstname'] = temp_fname
                    new_idf['lastname'] = temp_lname
                    new_idf = new_idf[['Email','firstname','lastname','SF_indexer__c','Venue__c']]
                    new_idf['Company'] = '-'

            #drop duplicate rows
            sf_df_match = sf_df_match.drop_duplicates(subset='SF_indexer__c')
            new_idf = new_idf.drop_duplicates()    

            sf_df_match['Company'] = sf_df_match['Company'].replace('  ','-')
            sf_df_match = sf_df_match.drop(columns='SF_indexer__c')

            #send the matching ones
            db_s_l1 = deepcopy(db_s)

            # send data to SF
            db_s_l1.object_id = 'Lead'
            # db_s_l1.external_id = 'SF_indexer__c'
            db_s_l1.external_id = 'Id'

            db_s_l1.send_data(sf_df_match)

            #send the new ones
            db_s_l2 = deepcopy(db_s)

            # send data to SF
            db_s_l2.object_id = 'Lead'
            db_s_l2.external_id = 'SF_indexer__c'

            db_s_l2.send_data(new_idf)

        success_files.append(file)
    except Exception as e:
        log.info("Error with file: " + str(e))
        fail_files.append(file)

[INFO] [790826062 - nanoHUB:google_imports]: Reading ML Webinar 11 03 21 - R.csv [790826062.<module>:3]
[INFO] [790826062 - nanoHUB:google_imports]: File type csv [790826062.<module>:6]
[INFO] [790826062 - nanoHUB:google_imports]: Loaded csv with encoding cp1252 now. [790826062.<module>:15]
[INFO] [790826062 - nanoHUB:google_imports]: Index(['FirstName  ', 'LastName  ', 'Email  ', 'Invited  ', 'Registered  ',
       'Attended  ', 'Registration Status  ', 'Lead Source ID  ',
       'Registration Date/Time  ', 'Registration ID  ', 'Registration Score  ',
       'Okay to send email  ', 'Title  ', 'Number of Employees  ', 'Company  ',
       'Phone  ', 'Address 1  ', 'Address 2  ', 'City  ', 'State/Province  ',
       'Postal/Zip Code  ', 'Country/Region  ',
       'How did you hear about this webinar?  ',
       'What is your primary motivation for attending this tutorial?  ',
       'Are you running nanoHUB in a classroom?  ',
       'We are looking for ways to continue discussion after 

In [19]:
## move success files from To_Import to Imported on Google Drive 
## move failure files from To_Import to Import_Issues on GDrive 

In [20]:
log.info("success files: ")
success_files

[INFO] [2124149726 - nanoHUB:google_imports]: success files:  [2124149726.<module>:1]


['ML Webinar 11 03 21 - R.csv']

In [21]:
log.info("fail files: ")
fail_files


[INFO] [1178869520 - nanoHUB:google_imports]: fail files:  [1178869520.<module>:1]


[]

In [22]:
tbd_imp_names = file_names
tbd_imp_ids = file_ids

In [23]:
## match success and failures to their appropriate ids
success_fids = []
failure_fids = []

for i in success_files:
    t_index = tbd_imp_names.index(i) #np.where(i in tbd_imp_names)
    success_fids.append(tbd_imp_ids[t_index])#[0][0]])

for i in fail_files:
    t_index = tbd_imp_names.index(i) #np.where(i in tbd_imp_names)
    failure_fids.append(tbd_imp_ids[t_index])#[0][0]])

In [24]:
log.info("success file ids: ")
success_fids

[INFO] [3776719133 - nanoHUB:google_imports]: success file ids:  [3776719133.<module>:1]


['1AlQZK8mRPRDX-WuvzwMM58-9aTPatl7r']

In [25]:
log.info("failure file ids: ")
failure_fids

[INFO] [2311033698 - nanoHUB:google_imports]: failure file ids:  [2311033698.<module>:1]


[]

In [26]:
## start with success (To_Import -> Imported)
imported_folder_id = get_subfolder_id(service, folder_id, IMPORTED_FOLDER_NAME)
log.info('Found imported sub-folder: %s (%s)' % (IMPORTED_FOLDER_NAME, imported_folder_id))

for sid in success_fids:
    log.info('Deleting success file with id: ' + sid)
    change_folder_for_file(sid, subfolder_id, imported_folder_id)


[INFO] [753651360 - nanoHUB:google_imports]: Found imported sub-folder: Imported (1ujcpSpj_Uj00VAENMU4OEm5KJiszBCb5) [753651360.<module>:3]
[INFO] [753651360 - nanoHUB:google_imports]: Deleting success file with id: 1AlQZK8mRPRDX-WuvzwMM58-9aTPatl7r [753651360.<module>:6]


In [27]:
## now do failures (To_Import -> Import_Issues)
# failure_folder_id = get_subfolder_id(service, folder_id, FAILURES_FOLDER_NAME)
# log.info('Found failure subfolder: %s (%s)' % (FAILURES_FOLDER_NAME, failure_folder_id))
#
# for fid in failure_fids:
#     log.info('Deleting failed file with id: ' + fid)
#     change_folder_for_file(fid, subfolder_id, failure_folder_id)

In [28]:
# remove files from local directory
for file in list_files:
    os.remove(import_dir + '/' +file)

IsADirectoryError: [Errno 21] Is a directory: '/home/saxenap/nanoHUB/.cache/SF_Imports/Import_Issues'