In [None]:
## to list files in a folder of google drive

In [3]:
from __future__ import print_function

import pickle
import os
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request

import argparse
import sys
import datetime
import time

In [4]:
def setup_drive_service():
    try:
        # If modifying these scopes, delete the file token.pickle.
        SCOPES = ['https://www.googleapis.com/auth/drive']

        creds = None
        gdrive_creds_json_loc = r'/home/rohit/PyWDUbuntu/thesis/gdrive_pil_20200701/credentials_pil_thesis_work_oauth_client_id_20200916.json'

        CREDS_PICKLE_TOP_FOLDER = r'/home/rohit/PyWDUbuntu/thesis/gdrive_pil_20200701/'
        creds_pickle_file_loc = r'/home/rohit/PyWDUbuntu/thesis/gdrive_pil_20200701/token_pil_thesis_work_oauth_client_id_20200916.pickle'
        # The file token.pickle stores the user's access and refresh tokens, and is
        # created automatically when the authorization flow completes for the first
        # time.
        if os.path.exists(creds_pickle_file_loc):
            with open(creds_pickle_file_loc, 'rb') as token:
                creds = pickle.load(token)
        # If there are no (valid) credentials available, let the user log in.
        if not creds or not creds.valid:
            if creds and creds.expired and creds.refresh_token:
                creds.refresh(Request())
            else:
                flow = InstalledAppFlow.from_client_secrets_file(
                    gdrive_creds_json_loc, SCOPES)
                creds = flow.run_local_server(port=0)
            # Save the credentials for the next run
            with open(creds_pickle_file_loc, 'wb') as token:
                pickle.dump(creds, token)

        drive_serviceservice = build('drive', 'v3', credentials=creds)
        print(f"\n\ntype of drive_serviceservice should say    <class 'googleapiclient.discovery.Resource'> =\n{type(drive_serviceservice)}\n\n")
        
        return drive_serviceservice
    except:
        return None

In [5]:
def list_files_in_gdrive_overall(_service):
    ## NOTE: To restrict the search to folders, use the query string to set the MIME type to 
    ##       q: mimeType = 'application/vnd.google-apps.folder'
    MAX_PAGES_LIMIT = 2
    count_pages = 0
    count_files = 0
    page_token = None
    while True:
        response = _service.files().list(q="mimeType='image/jpeg'",
                                              spaces='drive',
                                              fields='nextPageToken, files(id, name)',
                                              pageToken=page_token).execute()
        print(f"\nFiles for PAGE{count_pages+1}:\n")
        for file in response.get('files', []):
            # Process change
            print(f"Found file: {( file.get('name'), file.get('id') )}")
            count_files += 1
        page_token = response.get('nextPageToken', None)
        if page_token is None:
            break
        count_pages += 1
        if count_pages > MAX_PAGES_LIMIT:
            print(f"\n\nNumber of files found = {count_files}\n\n")
            print(f"\n\nMaximum page limit reached {MAX_PAGES_LIMIT} and breaking\n\n")
            break

In [6]:
def list_folders_in_gdrive_overall(_service):
    ## NOTE: To restrict the search to folders, use the query string to set the MIME type to 
    ##       q: mimeType = 'application/vnd.google-apps.folder'
    MAX_PAGES_LIMIT = 10
    count_pages = 0
    count_folders = 0
    page_token = None
    while True:
        response = _service.files().list(q="mimeType='application/vnd.google-apps.folder'",
                                              spaces='drive',
                                              fields='nextPageToken, files(id, name)',
                                              pageToken=page_token).execute()
        print(f"\nFolders for PAGE {count_pages+1}:\n")
        for file in response.get('files', []):
            # Process change
            print(f"Found folder: {( file.get('name'), file.get('id') )}")
            count_folders += 1
        page_token = response.get('nextPageToken', None)
        if page_token is None:
            break
        count_pages += 1
        if count_pages > MAX_PAGES_LIMIT:
            print(f"\n\nNumber of folders found = {count_folder}\n\n")
            print(f"\n\nMaximum page limit reached {MAX_PAGES_LIMIT} and breaking\n\n")
            break

In [8]:
for_files_only = "mimeType='image/jpeg'"
for_folders_only = "mimeType='application/vnd.google-apps.folder'"
query = "'" + 'FOLDERID' + "'" + ' in parents' + ' and ' + for_files_only
print(query)

'FOLDERID' in parents and mimeType='image/jpeg'


In [9]:
def list_items_in_gdrive_target_folder(_service, _folder_id, _files_or_folders = 'both'):
    ## NOTE: To restrict the search to folders, use the query string to set the MIME type to 
    ##       q: mimeType = 'application/vnd.google-apps.folder'
    MAX_PAGES_LIMIT = 5
    count_pages = 0
    count_files = 0
    page_token = None
    if _files_or_folders == 'folders':
        query_only_folders = "'" + _folder_id + "'" + ' in parents'+ ' and ' + "mimeType='application/vnd.google-apps.folder'"
        query = query_only_folders
    elif _files_or_folders == 'files':
        query_only_files = "'" + _folder_id + "'" + ' in parents'+ ' and ' + "mimeType != 'application/vnd.google-apps.folder'"
        query = query_only_files
    elif _files_or_folders == 'both': ## should be defaut is both
        query_folders_and_files = "'" + _folder_id + "'" + ' in parents'
        query = query_folders_and_files
    else: ## invalid value sent
        print(f"\n\nERROR : invalid value for the '_files_or_folders' parameter. Expected one of these three options:")
        print(f"both     OR    files    OR    folders")
        print(f"Your query parater=\n{_files_or_folders}\n")
        return None
    print(f"\nQuery parameter built as:{query}\n")
    while True:
        #response = _service.files().list(q="'1H76HF9yRRRdffGkzdKExaMWjMXV4YGcv' in parents",
        #                                      spaces='drive',
        #                                      fields='nextPageToken, files(id, name)',
        #                                      pageToken=page_token).execute()
        response = _service.files().list(q=query,
                                      spaces='drive',
                                      fields='nextPageToken, files(id, name)',
                                      pageToken=page_token).execute()
        print(f"\nItems for PAGE {count_pages+1}:\n")
        for file in response.get('files', []):
            # Process change
            print(f"Found folder/file: {( file.get('name'), file.get('id') )}")
            count_files += 1
        page_token = response.get('nextPageToken', None)
        if page_token is None:
            break
        count_pages += 1
        if count_pages > MAX_PAGES_LIMIT:
            print(f"\n\nNumber of itmes found = {count_files}\n\n")
            print(f"\n\nMaximum page limit reached {MAX_PAGES_LIMIT} and breaking\n\n")
            break

In [16]:
def retrieve_items_in_gdrive_target_folder(_service, _folder_id, _files_or_folders = 'both', _MAX_PAGES_LIMIT = 3):
    ## NOTE: To restrict the search to folders, use the query string to set the MIME type to 
    ##       q: mimeType = 'application/vnd.google-apps.folder'
    MAX_PAGES_LIMIT = _MAX_PAGES_LIMIT
    count_pages = 0
    count_files = 0
    page_token = None
    if _files_or_folders == 'folders':
        query_only_folders = "'" + _folder_id + "'" + ' in parents'+ ' and ' + "mimeType='application/vnd.google-apps.folder'"
        query = query_only_folders
    elif _files_or_folders == 'files':
        query_only_files = "'" + _folder_id + "'" + ' in parents'+ ' and ' + "mimeType != 'application/vnd.google-apps.folder'"
        query = query_only_files
    elif _files_or_folders == 'both': ## should be defaut is both
        query_folders_and_files = "'" + _folder_id + "'" + ' in parents'
        query = query_folders_and_files
    else: ## invalid value sent
        print(f"\n\nERROR : invalid value for the '_files_or_folders' parameter. Expected one of these three options:")
        print(f"both     OR    files    OR    folders")
        print(f"Your query parater=\n{_files_or_folders}\n")
        return None
    print(f"\nQuery parameter built as:{query}\n")
    items_result = []
    while True:
        #response = _service.files().list(q="'1H76HF9yRRRdffGkzdKExaMWjMXV4YGcv' in parents",
        #                                      spaces='drive',
        #                                      fields='nextPageToken, files(id, name)',
        #                                      pageToken=page_token).execute()
        response = _service.files().list(q=query,
                                      spaces='drive',
                                      fields='nextPageToken, files(id, name)',
                                      pageToken=page_token).execute()
        #print(f"\nItems for PAGE {count_pages+1}:\n")
        for file in response.get('files', []):
            # Process change
            #print(f"Found folder/file: {( file.get('name'), file.get('id') )}")
            items_result.append( {'name': file.get('name') , 'id': file.get('id')} )
            count_files += 1
        page_token = response.get('nextPageToken', None)
        if page_token is None:
            break
        count_pages += 1
        if count_pages > MAX_PAGES_LIMIT:
            print(f"\n\nNumber of itmes found = {count_files}\n\n")
            print(f"\n\nMaximum page limit reached {MAX_PAGES_LIMIT} and breaking\n\n")
            break
    return items_result

In [11]:
## make the service
service = setup_drive_service()
if service is None:
    print(f"\n\nFATAL ERROR: Could not setup gdrive service\n\n")
else:

    print(f"\n\nService setup SUCCESS\n\n")

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=55352638995-5rjg28ma1ishr098v884vgc2cpdhf08a.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A46869%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&state=e25txhhk8emzFwlAx40C9TiT7jupHJ&access_type=offline


type of drive_serviceservice should say    <class 'googleapiclient.discovery.Resource'> =
<class 'googleapiclient.discovery.Resource'>




Service setup SUCCESS




In [12]:
## my funcs
#list_files_in_gdrive_overall  -- display the info, does not return
#list_folders_in_gdrive_overall  -- display the info, does not return

## -- use the target folder querying functions  1) only files, 2) only folders 3) both (default)
#list_items_in_gdrive_target_folder  -- display the info, does not return
#retrieve_items_in_gdrive_target_folder  -- return the info , do not display

In [20]:
## list all the files AND folders in the target folder - default is both
## for only files    use   _files_or_folders='files'
## for only folders  use   _files_or_folders='folders'
GDRIVE_TARGET_FOLDER = '1H9Be89645jTimwXPYPVMHeMQeqhEFjGi'  ## has 5 temp files to delete

results_BOTH = retrieve_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER)
print(f"\nresults for query with BOTH=\n{results_BOTH}")

results_FILES = retrieve_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER, _files_or_folders = 'files')
print(f"\nresults for query with FILES=\n{results_FILES}")

results_FOLDERS = retrieve_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER, _files_or_folders = 'folders')
print(f"\nresults for query with FOLDERS=\n{results_FOLDERS}")


Query parameter built as:'1H9Be89645jTimwXPYPVMHeMQeqhEFjGi' in parents


results for query with BOTH=
[{'name': 'subfolder', 'id': '1BkFpEENiVuJ9Xq7jnp6Gc98bCEW0mJ2w'}, {'name': 'file_4.deletefile', 'id': '1-UUVxiNCWqZJZInnC8ZSW-a0i0IZFPkY'}, {'name': 'file_3.deletefile', 'id': '1-ZFvLpchsZ8XMLm_3cw1tFOlwNK5SPtk'}, {'name': 'file_2.deletefile', 'id': '1-axDYnX6l6hq60nGPpBy5afdgyE9kYuO'}, {'name': 'file_1.deletefile', 'id': '1-g1ZlBsvKIP_crCdfiQl6jm7GyEi6jux'}, {'name': 'file_0.deletefile', 'id': '1-TGlQjfW6seI3SgKV4VUqXKC43KH1Htj'}]

Query parameter built as:'1H9Be89645jTimwXPYPVMHeMQeqhEFjGi' in parents and mimeType != 'application/vnd.google-apps.folder'


results for query with FILES=
[{'name': 'file_4.deletefile', 'id': '1-UUVxiNCWqZJZInnC8ZSW-a0i0IZFPkY'}, {'name': 'file_3.deletefile', 'id': '1-ZFvLpchsZ8XMLm_3cw1tFOlwNK5SPtk'}, {'name': 'file_2.deletefile', 'id': '1-axDYnX6l6hq60nGPpBy5afdgyE9kYuO'}, {'name': 'file_1.deletefile', 'id': '1-g1ZlBsvKIP_crCdfiQl6jm7GyEi6jux'}, {'na

In [27]:
## list all the files AND folders in the target folder - default is both
## for only files    use   _files_or_folders='files'
## for only folders  use   _files_or_folders='folders'
GDRIVE_TARGET_FOLDER = '1H9Be89645jTimwXPYPVMHeMQeqhEFjGi'  ## has 5 temp files to delete
list_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER)


Query parameter built as:'1H9Be89645jTimwXPYPVMHeMQeqhEFjGi' in parents


Items for PAGE 1:

Found folder/file: ('subfolder', '1BkFpEENiVuJ9Xq7jnp6Gc98bCEW0mJ2w')
Found folder/file: ('file_4.deletefile', '1-UUVxiNCWqZJZInnC8ZSW-a0i0IZFPkY')
Found folder/file: ('file_3.deletefile', '1-ZFvLpchsZ8XMLm_3cw1tFOlwNK5SPtk')
Found folder/file: ('file_2.deletefile', '1-axDYnX6l6hq60nGPpBy5afdgyE9kYuO')
Found folder/file: ('file_1.deletefile', '1-g1ZlBsvKIP_crCdfiQl6jm7GyEi6jux')
Found folder/file: ('file_0.deletefile', '1-TGlQjfW6seI3SgKV4VUqXKC43KH1Htj')


In [32]:
## list all the files AND folders in the target folder - default is both
## for only files    use   _files_or_folders='files'
## for only folders  use   _files_or_folders='folders'
GDRIVE_TARGET_FOLDER = '1H9Be89645jTimwXPYPVMHeMQeqhEFjGi'  ## has 5 temp files to delete
list_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER, _files_or_folders = 'files')


Query parameter built as:'1H9Be89645jTimwXPYPVMHeMQeqhEFjGi' in parents and mimeType != 'application/vnd.google-apps.folder'


Items for PAGE 1:

Found folder/file: ('file_4.deletefile', '1-UUVxiNCWqZJZInnC8ZSW-a0i0IZFPkY')
Found folder/file: ('file_3.deletefile', '1-ZFvLpchsZ8XMLm_3cw1tFOlwNK5SPtk')
Found folder/file: ('file_2.deletefile', '1-axDYnX6l6hq60nGPpBy5afdgyE9kYuO')
Found folder/file: ('file_1.deletefile', '1-g1ZlBsvKIP_crCdfiQl6jm7GyEi6jux')
Found folder/file: ('file_0.deletefile', '1-TGlQjfW6seI3SgKV4VUqXKC43KH1Htj')


In [29]:
## list all the files AND folders in the target folder - default is both
## for only files    use   _files_or_folders='files'
## for only folders  use   _files_or_folders='folders'
GDRIVE_TARGET_FOLDER = '1H9Be89645jTimwXPYPVMHeMQeqhEFjGi'  ## has 5 temp files to delete
list_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER, _files_or_folders = 'folders')


Query parameter built as:'1H9Be89645jTimwXPYPVMHeMQeqhEFjGi' in parents and mimeType='application/vnd.google-apps.folder'


Items for PAGE 1:

Found folder/file: ('subfolder', '1BkFpEENiVuJ9Xq7jnp6Gc98bCEW0mJ2w')


In [30]:
## list all the files AND folders in the target folder - default is both
## for only files    use   _files_or_folders='files'
## for only folders  use   _files_or_folders='folders'
GDRIVE_TARGET_FOLDER = '1H9Be89645jTimwXPYPVMHeMQeqhEFjGi'  ## has 5 temp files to delete
list_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER, _files_or_folders = 'wrongValue')



ERROR : invalid value for the '_files_or_folders' parameter. Expected one of these three options:
both     OR    files    OR    folders
Your query parater=
wrongValue



In [9]:
## list all the folders only - WORKS  - but also seems to find from shared drives also
list_folders_in_gdrive_overall(service)


Folders for PAGE 1:

Found folder: ('subfolder', '1BkFpEENiVuJ9Xq7jnp6Gc98bCEW0mJ2w')
Found folder: ('coco_test2017_subset_95000_100000', '16QGxlnEKbGz9RcN3Yb_JrPGtaLISJSAY')
Found folder: ('coco_test2017_subset_90000_95000', '1kjHtuE5WXGZGO3ZndR3g9rjxzXukKNm7')
Found folder: ('coco_test2017_subset_85000_90000', '1qV5nlbjOwYhzqLnRb4W8_jbIL34i-EJb')
Found folder: ('coco_test2017_subset_80000_85000', '1X7JxVvDZu9khxALLoAA0ELEkFjT9Z06C')
Found folder: ('coco_test2017_subset_75000_80000', '1a8BKBcdLj0jGZyyUetvAzNJt4nk-DKwr')
Found folder: ('coco_test2017_subset_70000_75000', '1-ipE7onHkLHM-LGeQdcqUABPzl281QuN')
Found folder: ('coco_test2017_subset_65000_70000', '1-v5t6pTfRC9Y0EAsdJU2rMFZnWHAVt9P')
Found folder: ('coco_test2017_subset_60000_65000', '10EVf2GlWh1c9gKMmcJAXI_z0_7KDSrUM')
Found folder: ('coco_test2017_subset_55000_60000', '10Ghi7gP_GY21uVp15DxkJ3a4AgLWwyFr')
Found folder: ('coco_test2017_subset_50000_55000', '1-zVNYILy7vinMW31RRzp8Dodi2kbQBPl')
Found folder: ('coco_test2017_su

In [104]:
def _main_():

    service = setup_drive_service()
    if service is None:
        print(f"\n\nFATAL ERROR: Could not setup gdrive service\n\n")
        return
    else:
        print(f"\n\nService setup SUCCESS\n\n")
    
    GDRIVE_TARGET_FOLDER = '1FOxs0xhSEWcsfww3jbyd_uoRHQ4tH-ji'  ## has three .pkl files
    
    
    #list_files_in_gdrive_overall(service)
    #list_folders_in_gdrive_overall(service)
    list_files_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER)
    
    return

if __name__ == '__main__':
    _main_()



type of drive_serviceservice should say    <class 'googleapiclient.discovery.Resource'> =
<class 'googleapiclient.discovery.Resource'>




Service setup SUCCESS




Attempting to list file in this folder: 1FOxs0xhSEWcsfww3jbyd_uoRHQ4tH-ji




HttpError: <HttpError 404 when requesting https://www.googleapis.com/drive/v3/files?q=mimeType%3D%27image%2Fjpeg%27&driveId=1FOxs0xhSEWcsfww3jbyd_uoRHQ4tH-ji&corpora=drive&supportsAllDrives=true&includeItemsFromAllDrives=true&fields=nextPageToken%2C+files%28id%2C+name%29&alt=json returned "Shared drive not found: 1FOxs0xhSEWcsfww3jbyd_uoRHQ4tH-ji">

In [100]:
## list all the folders only

def _main_():

    service = setup_drive_service()
    if service is None:
        print(f"\n\nFATAL ERROR: Could not setup gdrive service\n\n")
        return
    else:
        print(f"\n\nService setup SUCCESS\n\n")
    
    GDRIVE_TARGET_FOLDER = '1FOxs0xhSEWcsfww3jbyd_uoRHQ4tH-ji'  ## has three .pkl files
    
    
    #list_files_in_gdrive_overall(service)
    list_folders_in_gdrive_overall(service)
    #list_files_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER)
    
    return

if __name__ == '__main__':
    _main_()



type of drive_serviceservice should say    <class 'googleapiclient.discovery.Resource'> =
<class 'googleapiclient.discovery.Resource'>




Service setup SUCCESS



Folders for PAGE 1:

Found folder: ('CocoTrain2017_1', '1FOxs0xhSEWcsfww3jbyd_uoRHQ4tH-ji')
Found folder: ('Rishabh_DO_NOT_TOUCH_PLS_1', '1n9jymR4BvQ-qsMLLM8Hcqj5xjcMU4Su6')
Found folder: ('COCO_train2017_118k', '1H76HF9yRRRdffGkzdKExaMWjMXV4YGcv')
Found folder: ('Checkpoints', '1OsHFXIJu2UUOr9MLC2l9nhMXV5wbBMFS')
Found folder: ('coco_train2017_wget_1', '1B6H0GTD_LglDGiD9kBQbxNKp9kevP1bi')
Found folder: ('Expose', '1Z_MAC9ypyK0fvJSGjt-kRBw11umcMyjY')
Found folder: ('Data', '1evkBGhjVDq9BlMNMUvnXLiHQNok3TY9g')
Found folder: ('credsFolder', '1AsTd9uwPYijC9vhLnbYtkEg7SSKOA7LA')
Found folder: ('ThesisStoryGen', '1RmmzWkzJa6pnqu8G9KOGYlY8rR-3kQQL')
Found folder: ('testdir', '1H9Be89645jTimwXPYPVMHeMQeqhEFjGi')
Found folder: ('WordEmbeddings', '1fEMCZmr_AO7O9CLgsB-1TD9fJMMXDLni')
Found folder: ('CocoVal2017_1', '1jpriQmU2GFFzoXf