In [None]:
## to list files in a folder of google drive

In [1]:
from __future__ import print_function

import pickle
import os
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request

import argparse
import sys
import datetime
import time

In [2]:
def setup_drive_service():
    try:
        # If modifying these scopes, delete the file token.pickle.
        SCOPES = ['https://www.googleapis.com/auth/drive']

        creds = None
        gdrive_creds_json_loc = r'/home/rohit/PyWDUbuntu/thesis/gdrive_pil_20200701/credentials_pil_thesis_work_oauth_client_id_20200916.json'

        CREDS_PICKLE_TOP_FOLDER = r'/home/rohit/PyWDUbuntu/thesis/gdrive_pil_20200701/'
        creds_pickle_file_loc = r'/home/rohit/PyWDUbuntu/thesis/gdrive_pil_20200701/token_pil_thesis_work_oauth_client_id_20200916.pickle'
        # The file token.pickle stores the user's access and refresh tokens, and is
        # created automatically when the authorization flow completes for the first
        # time.
        if os.path.exists(creds_pickle_file_loc):
            with open(creds_pickle_file_loc, 'rb') as token:
                creds = pickle.load(token)
        # If there are no (valid) credentials available, let the user log in.
        if not creds or not creds.valid:
            if creds and creds.expired and creds.refresh_token:
                creds.refresh(Request())
            else:
                flow = InstalledAppFlow.from_client_secrets_file(
                    gdrive_creds_json_loc, SCOPES)
                creds = flow.run_local_server(port=0)
            # Save the credentials for the next run
            with open(creds_pickle_file_loc, 'wb') as token:
                pickle.dump(creds, token)

        drive_serviceservice = build('drive', 'v3', credentials=creds)
        print(f"\n\ntype of drive_serviceservice should say    <class 'googleapiclient.discovery.Resource'> =\n{type(drive_serviceservice)}\n\n")
        
        return drive_serviceservice
    except:
        return None

In [3]:
def list_files_in_gdrive_overall(_service):
    ## NOTE: To restrict the search to folders, use the query string to set the MIME type to 
    ##       q: mimeType = 'application/vnd.google-apps.folder'
    MAX_PAGES_LIMIT = 2
    count_pages = 0
    count_files = 0
    page_token = None
    while True:
        response = _service.files().list(q="mimeType='image/jpeg'",
                                              spaces='drive',
                                              fields='nextPageToken, files(id, name)',
                                              pageToken=page_token).execute()
        print(f"\nFiles for PAGE{count_pages+1}:\n")
        for file in response.get('files', []):
            # Process change
            print(f"Found file: {( file.get('name'), file.get('id') )}")
            count_files += 1
        page_token = response.get('nextPageToken', None)
        if page_token is None:
            break
        count_pages += 1
        if count_pages > MAX_PAGES_LIMIT:
            print(f"\n\nNumber of files found = {count_files}\n\n")
            print(f"\n\nMaximum page limit reached {MAX_PAGES_LIMIT} and breaking\n\n")
            break

In [4]:
def list_folders_in_gdrive_overall(_service):
    ## NOTE: To restrict the search to folders, use the query string to set the MIME type to 
    ##       q: mimeType = 'application/vnd.google-apps.folder'
    MAX_PAGES_LIMIT = 10
    count_pages = 0
    count_folders = 0
    page_token = None
    while True:
        response = _service.files().list(q="mimeType='application/vnd.google-apps.folder'",
                                              spaces='drive',
                                              fields='nextPageToken, files(id, name)',
                                              pageToken=page_token).execute()
        print(f"\nFolders for PAGE {count_pages+1}:\n")
        for file in response.get('files', []):
            # Process change
            print(f"Found folder: {( file.get('name'), file.get('id') )}")
            count_folders += 1
        page_token = response.get('nextPageToken', None)
        if page_token is None:
            break
        count_pages += 1
        if count_pages > MAX_PAGES_LIMIT:
            print(f"\n\nNumber of folders found = {count_folder}\n\n")
            print(f"\n\nMaximum page limit reached {MAX_PAGES_LIMIT} and breaking\n\n")
            break

In [5]:
def list_files_in_gdrive_target_folder(_service, _folder_id):
    ## NOTEs:
    ## 1) To restrict the search to folders, use the query string to set the MIME type to 
    ##        q: mimeType = 'application/vnd.google-apps.folder'
    ## 2) The driveId parameter must be specified if and only if corpora is set to drive.
    ## 3) The includeItemsFromAllDrives parameter must be set to true when driveId is 
    ##        specified or corpora contains drive or allDrives."
    MAX_PAGES_LIMIT = 2
    count_pages = 0
    count_files = 0
    page_token = None
    print(f"\n\nAttempting to list file in this folder: {_folder_id}\n\n")
    while True:
        response = _service.files().list(q="mimeType='image/jpeg'",
                                              #spaces='drive',
                                              driveId=_folder_id,
                                              corpora='drive',
                                              supportsAllDrives=True,
                                              includeItemsFromAllDrives=True,
                                              fields='nextPageToken, files(id, name)',
                                              pageToken=page_token).execute()
        print(f"\nFiles for PAGE {count_pages+1}:\n")
        for file in response.get('files', []):
            # Process change
            print(f"Found file: {( file.get('name'), file.get('id') )}")
            count_files += 1
        page_token = response.get('nextPageToken', None)
        if page_token is None:
            break
        count_pages += 1
        if count_pages > MAX_PAGES_LIMIT:
            print(f"\n\nNumber of files found = {count_files}\n\n")
            print(f"\n\nMaximum page limit reached {MAX_PAGES_LIMIT} and breaking\n\n")
            break

In [6]:
## make the service
service = setup_drive_service()
if service is None:
    print(f"\n\nFATAL ERROR: Could not setup gdrive service\n\n")
else:

    print(f"\n\nService setup SUCCESS\n\n")



type of drive_serviceservice should say    <class 'googleapiclient.discovery.Resource'> =
<class 'googleapiclient.discovery.Resource'>




Service setup SUCCESS




In [7]:
## my funcs
#GDRIVE_TARGET_FOLDER = '1FOxs0xhSEWcsfww3jbyd_uoRHQ4tH-ji'  ## has three .pkl files
#list_files_in_gdrive_overall(service)
#list_folders_in_gdrive_overall(service)
#list_files_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER)

In [9]:
GDRIVE_TARGET_FOLDER = '1H9Be89645jTimwXPYPVMHeMQeqhEFjGi'  ## has 5 temp files to delete
#GDRIVE_TARGET_FOLDER = 'my-drive' ## top level last part of url
list_files_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER)



Attempting to list file in this folder: 1RmmzWkzJa6pnqu8G9KOGYlY8rR-3kQQL




HttpError: <HttpError 404 when requesting https://www.googleapis.com/drive/v3/files?q=mimeType%3D%27image%2Fjpeg%27&driveId=1RmmzWkzJa6pnqu8G9KOGYlY8rR-3kQQL&corpora=drive&supportsAllDrives=true&includeItemsFromAllDrives=true&fields=nextPageToken%2C+files%28id%2C+name%29&alt=json returned "Shared drive not found: 1RmmzWkzJa6pnqu8G9KOGYlY8rR-3kQQL">

In [8]:
## list all the folders only - WORKS  - but also seems to find from shared drives also
list_folders_in_gdrive_overall(service)


Folders for PAGE 1:

Found folder: ('coco_test2017_subset_70000_75000', '1-ipE7onHkLHM-LGeQdcqUABPzl281QuN')
Found folder: ('coco_test2017_subset_65000_70000', '1-v5t6pTfRC9Y0EAsdJU2rMFZnWHAVt9P')
Found folder: ('coco_test2017_subset_60000_65000', '10EVf2GlWh1c9gKMmcJAXI_z0_7KDSrUM')
Found folder: ('coco_test2017_subset_55000_60000', '10Ghi7gP_GY21uVp15DxkJ3a4AgLWwyFr')
Found folder: ('coco_test2017_subset_50000_55000', '1-zVNYILy7vinMW31RRzp8Dodi2kbQBPl')
Found folder: ('coco_test2017_subset_45000_50000', '1-h_nEnDf9IAuwZEMlM_gI4iaEsjoqd44')
Found folder: ('coco_test2017_subset_40000_45000', '1-OtyhCHzxNRwdxOG9-nu25jWK_aona3y')
Found folder: ('coco_test2017_subset_35000_40000', '1-pVi5g0eeis1FvLPzz-K_KaYa_DuNF4N')
Found folder: ('coco_test2017_subset_30000_35000', '1-ST34eZTk5-ymbFcCXxpUmzNyzI1pNMI')
Found folder: ('coco_test2017_subset_25000_30000', '1-OGOHBj_I2ZKWixrG2HQgPchCkTof_ex')
Found folder: ('coco_test2017_subset_20000_25000', '1-4IyQ5Qkc5fbXKXRQiNLOgHrktDuTmwr')
Found fold

In [104]:
def _main_():

    service = setup_drive_service()
    if service is None:
        print(f"\n\nFATAL ERROR: Could not setup gdrive service\n\n")
        return
    else:
        print(f"\n\nService setup SUCCESS\n\n")
    
    GDRIVE_TARGET_FOLDER = '1FOxs0xhSEWcsfww3jbyd_uoRHQ4tH-ji'  ## has three .pkl files
    
    
    #list_files_in_gdrive_overall(service)
    #list_folders_in_gdrive_overall(service)
    list_files_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER)
    
    return

if __name__ == '__main__':
    _main_()



type of drive_serviceservice should say    <class 'googleapiclient.discovery.Resource'> =
<class 'googleapiclient.discovery.Resource'>




Service setup SUCCESS




Attempting to list file in this folder: 1FOxs0xhSEWcsfww3jbyd_uoRHQ4tH-ji




HttpError: <HttpError 404 when requesting https://www.googleapis.com/drive/v3/files?q=mimeType%3D%27image%2Fjpeg%27&driveId=1FOxs0xhSEWcsfww3jbyd_uoRHQ4tH-ji&corpora=drive&supportsAllDrives=true&includeItemsFromAllDrives=true&fields=nextPageToken%2C+files%28id%2C+name%29&alt=json returned "Shared drive not found: 1FOxs0xhSEWcsfww3jbyd_uoRHQ4tH-ji">

In [100]:
## list all the folders only

def _main_():

    service = setup_drive_service()
    if service is None:
        print(f"\n\nFATAL ERROR: Could not setup gdrive service\n\n")
        return
    else:
        print(f"\n\nService setup SUCCESS\n\n")
    
    GDRIVE_TARGET_FOLDER = '1FOxs0xhSEWcsfww3jbyd_uoRHQ4tH-ji'  ## has three .pkl files
    
    
    #list_files_in_gdrive_overall(service)
    list_folders_in_gdrive_overall(service)
    #list_files_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER)
    
    return

if __name__ == '__main__':
    _main_()



type of drive_serviceservice should say    <class 'googleapiclient.discovery.Resource'> =
<class 'googleapiclient.discovery.Resource'>




Service setup SUCCESS



Folders for PAGE 1:

Found folder: ('CocoTrain2017_1', '1FOxs0xhSEWcsfww3jbyd_uoRHQ4tH-ji')
Found folder: ('Rishabh_DO_NOT_TOUCH_PLS_1', '1n9jymR4BvQ-qsMLLM8Hcqj5xjcMU4Su6')
Found folder: ('COCO_train2017_118k', '1H76HF9yRRRdffGkzdKExaMWjMXV4YGcv')
Found folder: ('Checkpoints', '1OsHFXIJu2UUOr9MLC2l9nhMXV5wbBMFS')
Found folder: ('coco_train2017_wget_1', '1B6H0GTD_LglDGiD9kBQbxNKp9kevP1bi')
Found folder: ('Expose', '1Z_MAC9ypyK0fvJSGjt-kRBw11umcMyjY')
Found folder: ('Data', '1evkBGhjVDq9BlMNMUvnXLiHQNok3TY9g')
Found folder: ('credsFolder', '1AsTd9uwPYijC9vhLnbYtkEg7SSKOA7LA')
Found folder: ('ThesisStoryGen', '1RmmzWkzJa6pnqu8G9KOGYlY8rR-3kQQL')
Found folder: ('testdir', '1H9Be89645jTimwXPYPVMHeMQeqhEFjGi')
Found folder: ('WordEmbeddings', '1fEMCZmr_AO7O9CLgsB-1TD9fJMMXDLni')
Found folder: ('CocoVal2017_1', '1jpriQmU2GFFzoXf