In [1]:
## to list files in a folder of google drive

In [1]:
from __future__ import print_function

import pickle
import os
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request

import argparse
import sys
import datetime
import time

In [2]:
def setup_drive_service():
    try:
        # If modifying these scopes, delete the file token.pickle.
        SCOPES = ['https://www.googleapis.com/auth/drive']

        creds = None
        gdrive_creds_json_loc = r'/home/rohit/PyWDUbuntu/thesis/gdrive_pil_20200701/credentials_pil_thesis_work_oauth_client_id_20200916.json'

        CREDS_PICKLE_TOP_FOLDER = r'/home/rohit/PyWDUbuntu/thesis/gdrive_pil_20200701/'
        creds_pickle_file_loc = r'/home/rohit/PyWDUbuntu/thesis/gdrive_pil_20200701/token_pil_thesis_work_oauth_client_id_20200916.pickle'
        # The file token.pickle stores the user's access and refresh tokens, and is
        # created automatically when the authorization flow completes for the first
        # time.
        if os.path.exists(creds_pickle_file_loc):
            with open(creds_pickle_file_loc, 'rb') as token:
                creds = pickle.load(token)
        # If there are no (valid) credentials available, let the user log in.
        if not creds or not creds.valid:
            if creds and creds.expired and creds.refresh_token:
                creds.refresh(Request())
            else:
                flow = InstalledAppFlow.from_client_secrets_file(
                    gdrive_creds_json_loc, SCOPES)
                creds = flow.run_local_server(port=0)
            # Save the credentials for the next run
            with open(creds_pickle_file_loc, 'wb') as token:
                pickle.dump(creds, token)

        drive_serviceservice = build('drive', 'v3', credentials=creds)
        print(f"\n\ntype of drive_serviceservice should say    <class 'googleapiclient.discovery.Resource'> =\n{type(drive_serviceservice)}\n\n")
        
        return drive_serviceservice
    except:
        return None

In [3]:
def list_files_in_gdrive_overall(_service):
    ## NOTE: To restrict the search to folders, use the query string to set the MIME type to 
    ##       q: mimeType = 'application/vnd.google-apps.folder'
    MAX_PAGES_LIMIT = 2
    count_pages = 0
    count_files = 0
    page_token = None
    while True:
        response = _service.files().list(q="mimeType='image/jpeg'",
                                              spaces='drive',
                                              fields='nextPageToken, files(id, name)',
                                              pageToken=page_token).execute()
        print(f"\nFiles for PAGE{count_pages+1}:\n")
        for file in response.get('files', []):
            # Process change
            print(f"Found file: {( file.get('name'), file.get('id') )}")
            count_files += 1
        page_token = response.get('nextPageToken', None)
        if page_token is None:
            break
        count_pages += 1
        if count_pages > MAX_PAGES_LIMIT:
            print(f"\n\nNumber of files found = {count_files}\n\n")
            print(f"\n\nMaximum page limit reached {MAX_PAGES_LIMIT} and breaking\n\n")
            break

In [4]:
def list_folders_in_gdrive_overall(_service):
    ## NOTE: To restrict the search to folders, use the query string to set the MIME type to 
    ##       q: mimeType = 'application/vnd.google-apps.folder'
    MAX_PAGES_LIMIT = 10
    count_pages = 0
    count_folders = 0
    page_token = None
    while True:
        response = _service.files().list(q="mimeType='application/vnd.google-apps.folder'",
                                              spaces='drive',
                                              fields='nextPageToken, files(id, name)',
                                              pageToken=page_token).execute()
        print(f"\nFolders for PAGE {count_pages+1}:\n")
        for file in response.get('files', []):
            # Process change
            print(f"Found folder: {( file.get('name'), file.get('id') )}")
            count_folders += 1
        page_token = response.get('nextPageToken', None)
        if page_token is None:
            break
        count_pages += 1
        if count_pages > MAX_PAGES_LIMIT:
            print(f"\n\nNumber of folders found = {count_folder}\n\n")
            print(f"\n\nMaximum page limit reached {MAX_PAGES_LIMIT} and breaking\n\n")
            break

In [5]:
def list_items_in_gdrive_target_folder(_service, _folder_id, _files_or_folders = 'both'):
    ## NOTE: To restrict the search to folders, use the query string to set the MIME type to 
    ##       q: mimeType = 'application/vnd.google-apps.folder'
    MAX_PAGES_LIMIT = 5
    count_pages = 0
    count_files = 0
    page_token = None
    if _files_or_folders == 'folders':
        query_only_folders = "'" + _folder_id + "'" + ' in parents'+ ' and ' + "mimeType='application/vnd.google-apps.folder'"
        query = query_only_folders
    elif _files_or_folders == 'files':
        query_only_files = "'" + _folder_id + "'" + ' in parents'+ ' and ' + "mimeType != 'application/vnd.google-apps.folder'"
        query = query_only_files
    elif _files_or_folders == 'both': ## should be defaut is both
        query_folders_and_files = "'" + _folder_id + "'" + ' in parents'
        query = query_folders_and_files
    else: ## invalid value sent
        print(f"\n\nERROR : invalid value for the '_files_or_folders' parameter. Expected one of these three options:")
        print(f"both     OR    files    OR    folders")
        print(f"Your query parater=\n{_files_or_folders}\n")
        return None
    print(f"\nQuery parameter built as:{query}\n")
    while True:
        #response = _service.files().list(q="'1H76HF9yRRRdffGkzdKExaMWjMXV4YGcv' in parents",
        #                                      spaces='drive',
        #                                      fields='nextPageToken, files(id, name)',
        #                                      pageToken=page_token).execute()
        response = _service.files().list(q=query,
                                      spaces='drive',
                                      fields='nextPageToken, files(id, name)',
                                      pageToken=page_token).execute()
        print(f"\nItems for PAGE {count_pages+1}:\n")
        for file in response.get('files', []):
            # Process change
            print(f"Found folder/file: {( file.get('name'), file.get('id') )}")
            count_files += 1
        page_token = response.get('nextPageToken', None)
        if page_token is None:
            break
        count_pages += 1
        if count_pages > MAX_PAGES_LIMIT:
            print(f"\n\nNumber of itmes found = {count_files}\n\n")
            print(f"\n\nMaximum page limit reached {MAX_PAGES_LIMIT} and breaking\n\n")
            break

In [6]:
def retrieve_items_in_gdrive_target_folder(_service, _folder_id, _files_or_folders = 'both', _MAX_PAGES_LIMIT = 3):
    ## NOTE: To restrict the search to folders, use the query string to set the MIME type to 
    ##       q: mimeType = 'application/vnd.google-apps.folder'
    MAX_PAGES_LIMIT = _MAX_PAGES_LIMIT
    count_pages = 0
    count_files = 0
    page_token = None
    if _files_or_folders == 'folders':
        query_only_folders = "'" + _folder_id + "'" + ' in parents'+ ' and ' + "mimeType='application/vnd.google-apps.folder'"
        query = query_only_folders
    elif _files_or_folders == 'files':
        query_only_files = "'" + _folder_id + "'" + ' in parents'+ ' and ' + "mimeType != 'application/vnd.google-apps.folder'"
        query = query_only_files
    elif _files_or_folders == 'both': ## should be defaut is both
        query_folders_and_files = "'" + _folder_id + "'" + ' in parents'
        query = query_folders_and_files
    else: ## invalid value sent
        print(f"\n\nERROR : invalid value for the '_files_or_folders' parameter. Expected one of these three options:")
        print(f"both     OR    files    OR    folders")
        print(f"Your query parater=\n{_files_or_folders}\n")
        return None
    print(f"\nQuery parameter built as:{query}\n")
    items_result = []
    while True:
        #response = _service.files().list(q="'1H76HF9yRRRdffGkzdKExaMWjMXV4YGcv' in parents",
        #                                      spaces='drive',
        #                                      fields='nextPageToken, files(id, name)',
        #                                      pageToken=page_token).execute()
        response = _service.files().list(q=query,
                                      spaces='drive',
                                      fields='nextPageToken, files(id, name)',
                                      pageToken=page_token).execute()
        #print(f"\nItems for PAGE {count_pages+1}:\n")
        for file in response.get('files', []):
            # Process change
            #print(f"Found folder/file: {( file.get('name'), file.get('id') )}")
            items_result.append( {'name': file.get('name') , 'id': file.get('id')} )
            count_files += 1
        page_token = response.get('nextPageToken', None)
        if page_token is None:
            break
        count_pages += 1
        if count_pages > MAX_PAGES_LIMIT:
            print(f"\n\nNumber of itmes found = {count_files}\n\n")
            print(f"\n\nMaximum page limit reached {MAX_PAGES_LIMIT} and breaking\n\n")
            break
    return items_result

In [7]:
def delete_file_items(_service, _file_id_arr, _status_freq = 100):
    count_files = len(_file_id_arr)
    count_deleted , count_problem = 0, 0
    problem_files = []
    #print(f"{_file_id_arr}")
    print(f"\n\nInput array has {count_files} files to delete")
    
    for idx, finfo_d in enumerate(_file_id_arr):
        #print(f"\nFile {idx+1} : {finfo_d.get('name')}")
        if idx % _status_freq == 0:
            print(f"At {datetime.datetime.now().strftime('%H:%M:%S')} :: idx = {idx+1} , Total = {count_files} , Deleted = {count_deleted} , Problem = {count_problem}")
        try:
            response = _service.files().delete(fileId=finfo_d.get('id'),
                                                              supportsAllDrives='False').execute()
            count_deleted += 1
        except Exception as e:
            problem_files.append(finfo_d)
            print(f"Exception =\n{e}")
            count_problem += 1
    print(f"\n\nSummary :: Total = {count_files} , Deleted = {count_deleted} , Problem = {count_problem}")
    return problem_files, count_files, count_deleted, count_problem

In [8]:
## make the service
service = setup_drive_service()
if service is None:
    print(f"\n\nFATAL ERROR: Could not setup gdrive service\n\n")
else:

    print(f"\n\nService setup SUCCESS\n\n")



type of drive_serviceservice should say    <class 'googleapiclient.discovery.Resource'> =
<class 'googleapiclient.discovery.Resource'>




Service setup SUCCESS




In [20]:
## my funcs
#list_files_in_gdrive_overall  -- display the info, does not return
#list_folders_in_gdrive_overall  -- display the info, does not return

## -- use the target folder querying functions  1) only files, 2) only folders 3) both (default)
#list_items_in_gdrive_target_folder  -- display the info, does not return
#retrieve_items_in_gdrive_target_folder  -- return the info , do not display

#delete_file_items  --  send an array of dict info (filename and id) to delete all those files

In [None]:
## DELETE THE FILES using the dict info retrieved earlier
status_freq = 50
prob_files , cnt_total, cnt_del, cnt_prob = delete_file_items(service, files_to_delete_arr, status_freq)
print(f"len(prob_files) = {len(prob_files)}")

## DELETE RUN 5 - RETRIEVED WITH MAX PAGES LIMIT = 100

In [None]:
## list all the files AND folders in the target folder - default is both
## for only files    use   _files_or_folders='files'
## for only folders  use   _files_or_folders='folders'
GDRIVE_TARGET_FOLDER = '1H76HF9yRRRdffGkzdKExaMWjMXV4YGcv'  ## coco_train2017_118k  folder


MAX_PAGES_LIMIT = 200
files_to_delete_arr = retrieve_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER, _files_or_folders = 'files', _MAX_PAGES_LIMIT = MAX_PAGES_LIMIT)
print(f"\nFew Results for query with FILES=\n{files_to_delete_arr[:10]}")

In [None]:
## DELETE THE FILES using the dict info retrieved earlier
status_freq = 125
prob_files , cnt_total, cnt_del, cnt_prob = delete_file_items(service, files_to_delete_arr, status_freq)
print(f"len(prob_files) = {len(prob_files)}")

## DELETE RUN 4 - RETRIEVED WITH MAX PAGES LIMIT = 100

In [18]:
## list all the files AND folders in the target folder - default is both
## for only files    use   _files_or_folders='files'
## for only folders  use   _files_or_folders='folders'
GDRIVE_TARGET_FOLDER = '1H76HF9yRRRdffGkzdKExaMWjMXV4YGcv'  ## coco_train2017_118k  folder


MAX_PAGES_LIMIT = 150
files_to_delete_arr = retrieve_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER, _files_or_folders = 'files', _MAX_PAGES_LIMIT = MAX_PAGES_LIMIT)
print(f"\nFew Results for query with FILES=\n{files_to_delete_arr[:10]}")


Query parameter built as:'1H76HF9yRRRdffGkzdKExaMWjMXV4YGcv' in parents and mimeType != 'application/vnd.google-apps.folder'



Number of itmes found = 15100




Maximum page limit reached 150 and breaking



Few Results for query with FILES=
[{'name': '000000206731.jpg', 'id': '1XjZcf-JzqFU24K-33FeK68V-yabhAAsk'}, {'name': '000000498265.jpg', 'id': '1l1lQpGxGpBCyXuR3pOcF6gJGyXZ1Sid3'}, {'name': '000000361381.jpg', 'id': '1ToC2DyxYrGtOvsJTYAl-WSL2ewADamwE'}, {'name': '000000458208.jpg', 'id': '15czg6a7IWpOH7UfgUSFptK1b0g-b1Sep'}, {'name': '000000398260.jpg', 'id': '17keFJ8o8hezHAW205h49kSKCb9ilGkT3'}, {'name': '000000068752.jpg', 'id': '1abZm-NcY_s53pvKTtEEszvY1q83G5zna'}, {'name': '000000303126.jpg', 'id': '1dQwn-sbHBPcJ4d-m6abIhHbXg8_dWPF7'}, {'name': '000000523718.jpg', 'id': '19tDIxqmMz5WNfaYhWNw9aC8ZQfAjH0Jk'}, {'name': '000000399499.jpg', 'id': '1ltqMTK69kYCOijE0ymwhaJLQ0RT5bzS5'}, {'name': '000000000491.jpg', 'id': '1HDHQjACiOg7x5MtQiaj6qJyu7DZthtaC'}]


In [None]:
## DELETE THE FILES using the dict info retrieved earlier
status_freq = 75
prob_files , cnt_total, cnt_del, cnt_prob = delete_file_items(service, files_to_delete_arr, status_freq)
print(f"len(prob_files) = {len(prob_files)}")



Input array has 15100 files to delete
At 19:35:02 :: idx = 1 , Total = 15100 , Deleted = 0 , Problem = 0
At 19:35:59 :: idx = 76 , Total = 15100 , Deleted = 75 , Problem = 0
At 19:36:51 :: idx = 151 , Total = 15100 , Deleted = 150 , Problem = 0
At 19:37:38 :: idx = 226 , Total = 15100 , Deleted = 225 , Problem = 0
At 19:38:27 :: idx = 301 , Total = 15100 , Deleted = 300 , Problem = 0
At 19:39:21 :: idx = 376 , Total = 15100 , Deleted = 375 , Problem = 0
At 19:40:06 :: idx = 451 , Total = 15100 , Deleted = 450 , Problem = 0
At 19:40:52 :: idx = 526 , Total = 15100 , Deleted = 525 , Problem = 0
At 19:41:39 :: idx = 601 , Total = 15100 , Deleted = 600 , Problem = 0
At 19:42:26 :: idx = 676 , Total = 15100 , Deleted = 675 , Problem = 0
At 19:43:15 :: idx = 751 , Total = 15100 , Deleted = 750 , Problem = 0
At 19:44:06 :: idx = 826 , Total = 15100 , Deleted = 825 , Problem = 0
At 19:45:01 :: idx = 901 , Total = 15100 , Deleted = 900 , Problem = 0
At 19:45:49 :: idx = 976 , Total = 15100 , 

## DELETE RUN 3 - RETRIEVED WITH MAX PAGES LIMIT = 100

In [15]:
## list all the files AND folders in the target folder - default is both
## for only files    use   _files_or_folders='files'
## for only folders  use   _files_or_folders='folders'
GDRIVE_TARGET_FOLDER = '1H76HF9yRRRdffGkzdKExaMWjMXV4YGcv'  ## coco_train2017_118k  folder


MAX_PAGES_LIMIT = 100
files_to_delete_arr = retrieve_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER, _files_or_folders = 'files', _MAX_PAGES_LIMIT = MAX_PAGES_LIMIT)
print(f"\nFew Results for query with FILES=\n{files_to_delete_arr[:10]}")


Query parameter built as:'1H76HF9yRRRdffGkzdKExaMWjMXV4YGcv' in parents and mimeType != 'application/vnd.google-apps.folder'



Number of itmes found = 10100




Maximum page limit reached 100 and breaking



Few Results for query with FILES=
[{'name': '000000036382.jpg', 'id': '1HUBnYuneSn4VP8gx36KovEZ3jKCQgvoU'}, {'name': '000000375552.jpg', 'id': '1bwbCPXl1hYrHh8vJUEzauImmwpKNDsej'}, {'name': '000000282603.jpg', 'id': '1O_ai3zB8tQY7EBP2Thk1dAAvsapqKUQs'}, {'name': '000000362795.jpg', 'id': '116TUzpq6DuKOxhckqaypxR09EI1i1k5a'}, {'name': '000000318183.jpg', 'id': '1vg0uEBx3oOrDA6hYkcF_cG3OxUC7wCew'}, {'name': '000000009479.jpg', 'id': '1d6oYaPBITe0zOvjgkxeq4t_FqnbFHQvo'}, {'name': '000000497542.jpg', 'id': '14TyY9gV5UVwaKDPIkr2L7ihR4rAlYqfH'}, {'name': '000000021294.jpg', 'id': '1OffnDBhpyoh14DHlTOgwkBZykh7Fm3c3'}, {'name': '000000308775.jpg', 'id': '1iG3u9PWoDU0QNSyvg7kHdJmiVCUZVvmU'}, {'name': '000000355578.jpg', 'id': '1oPJqkyaiCB3iOv3QZe59ZKx92PQUMD8n'}]


In [16]:
## DELETE THE FILES using the dict info retrieved earlier
status_freq = 50
prob_files , cnt_total, cnt_del, cnt_prob = delete_file_items(service, files_to_delete_arr, status_freq)
print(f"len(prob_files) = {len(prob_files)}")



Input array has 10100 files to delete
At 17:12:45 :: idx = 1 , Total = 10100 , Deleted = 0 , Problem = 0
At 17:13:22 :: idx = 51 , Total = 10100 , Deleted = 50 , Problem = 0
At 17:14:02 :: idx = 101 , Total = 10100 , Deleted = 100 , Problem = 0
At 17:14:34 :: idx = 151 , Total = 10100 , Deleted = 150 , Problem = 0
At 17:15:04 :: idx = 201 , Total = 10100 , Deleted = 200 , Problem = 0
At 17:15:36 :: idx = 251 , Total = 10100 , Deleted = 250 , Problem = 0
At 17:16:08 :: idx = 301 , Total = 10100 , Deleted = 300 , Problem = 0
At 17:16:40 :: idx = 351 , Total = 10100 , Deleted = 350 , Problem = 0
At 17:17:13 :: idx = 401 , Total = 10100 , Deleted = 400 , Problem = 0
At 17:17:43 :: idx = 451 , Total = 10100 , Deleted = 450 , Problem = 0
At 17:18:13 :: idx = 501 , Total = 10100 , Deleted = 500 , Problem = 0
At 17:18:42 :: idx = 551 , Total = 10100 , Deleted = 550 , Problem = 0
At 17:19:13 :: idx = 601 , Total = 10100 , Deleted = 600 , Problem = 0
At 17:19:41 :: idx = 651 , Total = 10100 , 

At 18:10:40 :: idx = 5651 , Total = 10100 , Deleted = 5650 , Problem = 0
At 18:11:08 :: idx = 5701 , Total = 10100 , Deleted = 5700 , Problem = 0
At 18:11:34 :: idx = 5751 , Total = 10100 , Deleted = 5750 , Problem = 0
At 18:12:06 :: idx = 5801 , Total = 10100 , Deleted = 5800 , Problem = 0
At 18:12:32 :: idx = 5851 , Total = 10100 , Deleted = 5850 , Problem = 0
At 18:12:58 :: idx = 5901 , Total = 10100 , Deleted = 5900 , Problem = 0
At 18:13:30 :: idx = 5951 , Total = 10100 , Deleted = 5950 , Problem = 0
At 18:13:55 :: idx = 6001 , Total = 10100 , Deleted = 6000 , Problem = 0
At 18:14:21 :: idx = 6051 , Total = 10100 , Deleted = 6050 , Problem = 0
At 18:14:48 :: idx = 6101 , Total = 10100 , Deleted = 6100 , Problem = 0
At 18:15:17 :: idx = 6151 , Total = 10100 , Deleted = 6150 , Problem = 0
At 18:15:46 :: idx = 6201 , Total = 10100 , Deleted = 6200 , Problem = 0
At 18:16:24 :: idx = 6251 , Total = 10100 , Deleted = 6250 , Problem = 0
At 18:16:51 :: idx = 6301 , Total = 10100 , Deleted

## DELETE RUN 2 - RETRIEVED WITH MAX PAGES LIMIT = 20

In [13]:
## list all the files AND folders in the target folder - default is both
## for only files    use   _files_or_folders='files'
## for only folders  use   _files_or_folders='folders'
GDRIVE_TARGET_FOLDER = '1H76HF9yRRRdffGkzdKExaMWjMXV4YGcv'  ## coco_train2017_118k  folder


MAX_PAGES_LIMIT = 20
files_to_delete_arr = retrieve_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER, _files_or_folders = 'files', _MAX_PAGES_LIMIT = MAX_PAGES_LIMIT)
print(f"\nFew Results for query with FILES=\n{files_to_delete_arr[:10]}")


Query parameter built as:'1H76HF9yRRRdffGkzdKExaMWjMXV4YGcv' in parents and mimeType != 'application/vnd.google-apps.folder'



Number of itmes found = 2100




Maximum page limit reached 20 and breaking



Few Results for query with FILES=
[{'name': '000000081971.jpg', 'id': '1nsL_AhK6imbhNvuJPd280t4I9y5Mi4F8'}, {'name': '000000471698.jpg', 'id': '1ZlvB4x7bMoqBfwFWA4AE5H04cU1yWkX1'}, {'name': '000000488897.jpg', 'id': '1dl0c6YDV6LtvUF0iusjZJHaFceQQxmja'}, {'name': '000000138999.jpg', 'id': '1MOeFOVTjqHm5SRRN9k2sIZLcyTkVymBF'}, {'name': '000000101369.jpg', 'id': '1bFVJj-QqrR5lSjZY6lBp8O9ks-uRFY3Y'}, {'name': '000000345265.jpg', 'id': '1wFEa3l_zjpUVBTw9uASIclJs15VJT1-k'}, {'name': '000000333775.jpg', 'id': '1EHPI-Gg52PI2alcioG23ztSmL5O6lRoc'}, {'name': '000000558166.jpg', 'id': '1CpQ88VzQu4xS1ChD42ap5KpXm9zMwGaR'}, {'name': '000000341455.jpg', 'id': '12HH4QA1ME2n3kRYyeHI5OFiHOqKzqE4m'}, {'name': '000000463175.jpg', 'id': '1pmUdQjogOpW1Z5jmqsVaPxh4ITE2cMFi'}]


In [14]:
## DELETE THE FILES using the dict info retrieved earlier
status_freq = 20
prob_files , cnt_total, cnt_del, cnt_prob = delete_file_items(service, files_to_delete_arr, status_freq)
print(f"len(prob_files) = {len(prob_files)}")



Input array has 2100 files to delete
At 16:44:07 :: idx = 1 , Total = 2100 , Deleted = 0 , Problem = 0
At 16:44:21 :: idx = 21 , Total = 2100 , Deleted = 20 , Problem = 0
At 16:44:34 :: idx = 41 , Total = 2100 , Deleted = 40 , Problem = 0
At 16:44:49 :: idx = 61 , Total = 2100 , Deleted = 60 , Problem = 0
At 16:45:02 :: idx = 81 , Total = 2100 , Deleted = 80 , Problem = 0
At 16:45:18 :: idx = 101 , Total = 2100 , Deleted = 100 , Problem = 0
At 16:45:31 :: idx = 121 , Total = 2100 , Deleted = 120 , Problem = 0
At 16:45:46 :: idx = 141 , Total = 2100 , Deleted = 140 , Problem = 0
At 16:46:00 :: idx = 161 , Total = 2100 , Deleted = 160 , Problem = 0
At 16:46:13 :: idx = 181 , Total = 2100 , Deleted = 180 , Problem = 0
At 16:46:31 :: idx = 201 , Total = 2100 , Deleted = 200 , Problem = 0
At 16:46:49 :: idx = 221 , Total = 2100 , Deleted = 220 , Problem = 0
At 16:47:05 :: idx = 241 , Total = 2100 , Deleted = 240 , Problem = 0
At 16:47:17 :: idx = 261 , Total = 2100 , Deleted = 260 , Probl

## DELETE RUN 1 - RETRIEVED WITH MAX PAGES LIMIT = 3 (DEFAULT VALUE WAS USED)

In [11]:
## list all the files AND folders in the target folder - default is both
## for only files    use   _files_or_folders='files'
## for only folders  use   _files_or_folders='folders'
GDRIVE_TARGET_FOLDER = '1H76HF9yRRRdffGkzdKExaMWjMXV4YGcv'  ## coco_train2017_118k  folder

files_to_delete_arr = retrieve_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER, _files_or_folders = 'files')
print(f"\nFew Results for query with FILES=\n{files_to_delete_arr[:10]}")


Query parameter built as:'1H76HF9yRRRdffGkzdKExaMWjMXV4YGcv' in parents and mimeType != 'application/vnd.google-apps.folder'



Number of itmes found = 400




Maximum page limit reached 3 and breaking



Few Results for query with FILES=
[{'name': '000000081971.jpg', 'id': '1nsL_AhK6imbhNvuJPd280t4I9y5Mi4F8'}, {'name': '000000471698.jpg', 'id': '1ZlvB4x7bMoqBfwFWA4AE5H04cU1yWkX1'}, {'name': '000000488897.jpg', 'id': '1dl0c6YDV6LtvUF0iusjZJHaFceQQxmja'}, {'name': '000000138999.jpg', 'id': '1MOeFOVTjqHm5SRRN9k2sIZLcyTkVymBF'}, {'name': '000000101369.jpg', 'id': '1bFVJj-QqrR5lSjZY6lBp8O9ks-uRFY3Y'}, {'name': '000000345265.jpg', 'id': '1wFEa3l_zjpUVBTw9uASIclJs15VJT1-k'}, {'name': '000000333775.jpg', 'id': '1EHPI-Gg52PI2alcioG23ztSmL5O6lRoc'}, {'name': '000000558166.jpg', 'id': '1CpQ88VzQu4xS1ChD42ap5KpXm9zMwGaR'}, {'name': '000000341455.jpg', 'id': '12HH4QA1ME2n3kRYyeHI5OFiHOqKzqE4m'}, {'name': '000000463175.jpg', 'id': '1pmUdQjogOpW1Z5jmqsVaPxh4ITE2cMFi'}]


In [10]:
## DELETE THE FILES using the dict info retrieved earlier
status_freq = 50
prob_files , cnt_total, cnt_del, cnt_prob = delete_file_items(service, files_to_delete_arr, status_freq)
print(f"len(prob_files) = {len(prob_files)}")



Input array has 400 files to delete
At 16:25:48 :: idx = 1 , Total = 400 , Deleted = 0 , Problem = 0
At 16:26:38 :: idx = 51 , Total = 400 , Deleted = 50 , Problem = 0
At 16:27:28 :: idx = 101 , Total = 400 , Deleted = 100 , Problem = 0
At 16:28:14 :: idx = 151 , Total = 400 , Deleted = 150 , Problem = 0
At 16:28:56 :: idx = 201 , Total = 400 , Deleted = 200 , Problem = 0
At 16:29:33 :: idx = 251 , Total = 400 , Deleted = 250 , Problem = 0
At 16:30:09 :: idx = 301 , Total = 400 , Deleted = 300 , Problem = 0
At 16:30:46 :: idx = 351 , Total = 400 , Deleted = 350 , Problem = 0


Summary :: Total = 400 , Deleted = 400 , Problem = 0
len(prob_files) = 0


In [10]:
## list all the files AND folders in the target folder - default is both
## for only files    use   _files_or_folders='files'
## for only folders  use   _files_or_folders='folders'
GDRIVE_TARGET_FOLDER = '1H9Be89645jTimwXPYPVMHeMQeqhEFjGi'  ## has 5 temp files to delete

results_BOTH = retrieve_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER)
print(f"\nresults for query with BOTH=\n{results_BOTH}")

results_FILES = retrieve_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER, _files_or_folders = 'files')
print(f"\nresults for query with FILES=\n{results_FILES}")

results_FOLDERS = retrieve_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER, _files_or_folders = 'folders')
print(f"\nresults for query with FOLDERS=\n{results_FOLDERS}")


Query parameter built as:'1H9Be89645jTimwXPYPVMHeMQeqhEFjGi' in parents


results for query with BOTH=
[{'name': 'subfolder333', 'id': '1GB2rl3avy8KUoxDN8B-y-THJWSHRmgVc'}, {'name': 'subfolder222', 'id': '1dHXCcWJekNCT40xY5SZYL_MZON6PABgb'}, {'name': 'subfolder111', 'id': '1BkFpEENiVuJ9Xq7jnp6Gc98bCEW0mJ2w'}, {'name': 'file_4.deletefile', 'id': '1-UUVxiNCWqZJZInnC8ZSW-a0i0IZFPkY'}, {'name': 'file_3.deletefile', 'id': '1-ZFvLpchsZ8XMLm_3cw1tFOlwNK5SPtk'}, {'name': 'file_2.deletefile', 'id': '1-axDYnX6l6hq60nGPpBy5afdgyE9kYuO'}, {'name': 'file_1.deletefile', 'id': '1-g1ZlBsvKIP_crCdfiQl6jm7GyEi6jux'}, {'name': 'file_0.deletefile', 'id': '1-TGlQjfW6seI3SgKV4VUqXKC43KH1Htj'}]

Query parameter built as:'1H9Be89645jTimwXPYPVMHeMQeqhEFjGi' in parents and mimeType != 'application/vnd.google-apps.folder'


results for query with FILES=
[{'name': 'file_4.deletefile', 'id': '1-UUVxiNCWqZJZInnC8ZSW-a0i0IZFPkY'}, {'name': 'file_3.deletefile', 'id': '1-ZFvLpchsZ8XMLm_3cw1tFOlwNK5SPtk'}, {'name': 'f

In [11]:
## list all the files AND folders in the target folder - default is both
## for only files    use   _files_or_folders='files'
## for only folders  use   _files_or_folders='folders'
GDRIVE_TARGET_FOLDER = '1H9Be89645jTimwXPYPVMHeMQeqhEFjGi'  ## has 5 temp files to delete

results_FILES = retrieve_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER, _files_or_folders = 'files')
print(f"\nresults for query with FILES=\n{results_FILES}")


Query parameter built as:'1H9Be89645jTimwXPYPVMHeMQeqhEFjGi' in parents and mimeType != 'application/vnd.google-apps.folder'


results for query with FILES=
[{'name': 'file_4.deletefile', 'id': '1-UUVxiNCWqZJZInnC8ZSW-a0i0IZFPkY'}, {'name': 'file_3.deletefile', 'id': '1-ZFvLpchsZ8XMLm_3cw1tFOlwNK5SPtk'}, {'name': 'file_2.deletefile', 'id': '1-axDYnX6l6hq60nGPpBy5afdgyE9kYuO'}, {'name': 'file_1.deletefile', 'id': '1-g1ZlBsvKIP_crCdfiQl6jm7GyEi6jux'}, {'name': 'file_0.deletefile', 'id': '1-TGlQjfW6seI3SgKV4VUqXKC43KH1Htj'}]


In [27]:
## list all the files AND folders in the target folder - default is both
## for only files    use   _files_or_folders='files'
## for only folders  use   _files_or_folders='folders'
GDRIVE_TARGET_FOLDER = '1H9Be89645jTimwXPYPVMHeMQeqhEFjGi'  ## has 5 temp files to delete
list_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER)


Query parameter built as:'1H9Be89645jTimwXPYPVMHeMQeqhEFjGi' in parents


Items for PAGE 1:

Found folder/file: ('subfolder', '1BkFpEENiVuJ9Xq7jnp6Gc98bCEW0mJ2w')
Found folder/file: ('file_4.deletefile', '1-UUVxiNCWqZJZInnC8ZSW-a0i0IZFPkY')
Found folder/file: ('file_3.deletefile', '1-ZFvLpchsZ8XMLm_3cw1tFOlwNK5SPtk')
Found folder/file: ('file_2.deletefile', '1-axDYnX6l6hq60nGPpBy5afdgyE9kYuO')
Found folder/file: ('file_1.deletefile', '1-g1ZlBsvKIP_crCdfiQl6jm7GyEi6jux')
Found folder/file: ('file_0.deletefile', '1-TGlQjfW6seI3SgKV4VUqXKC43KH1Htj')


In [32]:
## list all the files AND folders in the target folder - default is both
## for only files    use   _files_or_folders='files'
## for only folders  use   _files_or_folders='folders'
GDRIVE_TARGET_FOLDER = '1H9Be89645jTimwXPYPVMHeMQeqhEFjGi'  ## has 5 temp files to delete
list_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER, _files_or_folders = 'files')


Query parameter built as:'1H9Be89645jTimwXPYPVMHeMQeqhEFjGi' in parents and mimeType != 'application/vnd.google-apps.folder'


Items for PAGE 1:

Found folder/file: ('file_4.deletefile', '1-UUVxiNCWqZJZInnC8ZSW-a0i0IZFPkY')
Found folder/file: ('file_3.deletefile', '1-ZFvLpchsZ8XMLm_3cw1tFOlwNK5SPtk')
Found folder/file: ('file_2.deletefile', '1-axDYnX6l6hq60nGPpBy5afdgyE9kYuO')
Found folder/file: ('file_1.deletefile', '1-g1ZlBsvKIP_crCdfiQl6jm7GyEi6jux')
Found folder/file: ('file_0.deletefile', '1-TGlQjfW6seI3SgKV4VUqXKC43KH1Htj')


In [29]:
## list all the files AND folders in the target folder - default is both
## for only files    use   _files_or_folders='files'
## for only folders  use   _files_or_folders='folders'
GDRIVE_TARGET_FOLDER = '1H9Be89645jTimwXPYPVMHeMQeqhEFjGi'  ## has 5 temp files to delete
list_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER, _files_or_folders = 'folders')


Query parameter built as:'1H9Be89645jTimwXPYPVMHeMQeqhEFjGi' in parents and mimeType='application/vnd.google-apps.folder'


Items for PAGE 1:

Found folder/file: ('subfolder', '1BkFpEENiVuJ9Xq7jnp6Gc98bCEW0mJ2w')


In [30]:
## list all the files AND folders in the target folder - default is both
## for only files    use   _files_or_folders='files'
## for only folders  use   _files_or_folders='folders'
GDRIVE_TARGET_FOLDER = '1H9Be89645jTimwXPYPVMHeMQeqhEFjGi'  ## has 5 temp files to delete
list_items_in_gdrive_target_folder(service, GDRIVE_TARGET_FOLDER, _files_or_folders = 'wrongValue')



ERROR : invalid value for the '_files_or_folders' parameter. Expected one of these three options:
both     OR    files    OR    folders
Your query parater=
wrongValue



In [9]:
## list all the folders only - WORKS  - but also seems to find from shared drives also
list_folders_in_gdrive_overall(service)


Folders for PAGE 1:

Found folder: ('subfolder', '1BkFpEENiVuJ9Xq7jnp6Gc98bCEW0mJ2w')
Found folder: ('coco_test2017_subset_95000_100000', '16QGxlnEKbGz9RcN3Yb_JrPGtaLISJSAY')
Found folder: ('coco_test2017_subset_90000_95000', '1kjHtuE5WXGZGO3ZndR3g9rjxzXukKNm7')
Found folder: ('coco_test2017_subset_85000_90000', '1qV5nlbjOwYhzqLnRb4W8_jbIL34i-EJb')
Found folder: ('coco_test2017_subset_80000_85000', '1X7JxVvDZu9khxALLoAA0ELEkFjT9Z06C')
Found folder: ('coco_test2017_subset_75000_80000', '1a8BKBcdLj0jGZyyUetvAzNJt4nk-DKwr')
Found folder: ('coco_test2017_subset_70000_75000', '1-ipE7onHkLHM-LGeQdcqUABPzl281QuN')
Found folder: ('coco_test2017_subset_65000_70000', '1-v5t6pTfRC9Y0EAsdJU2rMFZnWHAVt9P')
Found folder: ('coco_test2017_subset_60000_65000', '10EVf2GlWh1c9gKMmcJAXI_z0_7KDSrUM')
Found folder: ('coco_test2017_subset_55000_60000', '10Ghi7gP_GY21uVp15DxkJ3a4AgLWwyFr')
Found folder: ('coco_test2017_subset_50000_55000', '1-zVNYILy7vinMW31RRzp8Dodi2kbQBPl')
Found folder: ('coco_test2017_su