In [1]:
import io
import os
import oauth2client
from oauth2client import file, client, tools
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload

In [2]:
import ipywidgets as widgets

## Class to wrap Google Drive API 

In [3]:
class DriveDownloader():
    """Thin wrapper to easily automate Google Drive authenticating, searching and downloading
    
    Google Drive API v3 tutorial: https://developers.google.com/drive/api/v3/quickstart/python 
    """

    def __init__(self):
        """Setting attribute defaults (for using the public demo gdrive folder)"""

        self.folder_name = 'test1' 
        self.folder_id = '17VB4dXCU-FhW_qNoUphmrjjGJ2brx3f1'  # https://drive.google.com/drive/u/0/folders/17VB4dXCU-FhW_qNoUphmrjjGJ2brx3f1
        self.search_key = '.md'
        self.path = './data/'
        self.service = None
        self.items = None

    
    def authenticate_gdrive(self, path_to_credentials):
        """Abstracting the authentication process
        Reference:
        https://medium.com/@umdfirecoml/a-step-by-step-guide-on-how-to-download-your-google-drive-data-to-your-jupyter-notebook-using-the-52f4ce63c66c
        """
        
        if self.service is None:
            os.makedirs(path_to_credentials, exist_ok=True)
            obj = lambda: None
            lmao = {"auth_host_name":'localhost', 'noauth_local_webserver':'store_true', 'auth_host_port':[8080, 8090], 'logging_level':'ERROR'}
            for k, v in lmao.items():
                setattr(obj, k, v)

            # authorization boilerplate code
            SCOPES = 'https://www.googleapis.com/auth/drive.readonly'
            store = file.Storage(path_to_credentials + 'token.json')
            creds = store.get()
            # The following will give you a link if token.json does not exist, the link allows the user to give this app permission
            if not creds or creds.invalid:
                flow = client.flow_from_clientsecrets(path_to_credentials + 'client_id.json', SCOPES)
                creds = tools.run_flow(flow, store, obj)

            service = build('drive', 'v3', credentials=creds)
            self.service = service

    @staticmethod
    def search_gdrive(service, folder_id, search_key):
        """Search based on a parent folder_id and a regex search_key
        
        Will print out the search query and the results
        Returns: the list of items found mathing the criteria
        """
        
        if service is None:
            print("[error in search_gdrive]: no service. run authenticate_gdrive() first")
            return None
        else:
            query_string = f"name contains '{search_key}' and parents in '{folder_id}'"
            print(query_string)
            results = service.files().list(
                    q=query_string,
                    spaces='drive',
                    pageSize=50, fields="nextPageToken, files(id, name)").execute()
            items = results.get('files', [])

            if not items:
                print('No files found.')
            else:
                print('Files:')
                for i, item in enumerate(items):
                    print(f"item_id: {i}, filename: {item['name']} , file_id: {item['id']}")

            return items

    @staticmethod
    def download_gdrive(service, items, path, folder_name):
        """Download the files in an items list
        
        Will print the percentage compeltion during download of each file
        """
        
        path_to_save = path + folder_name + '/'
        os.makedirs(path_to_save, exist_ok=True)
        
        if items is None or items==[]:
            print("[error in download_gdrive]: no items to download. Run search_gdrive() first")
        else:
            for i, item in enumerate(items): 
                file_id = item['id']
                filename = item['name']

                request = service.files().get_media(fileId=file_id)
                fh = io.FileIO(path_to_save + filename, mode='w')

                downloader = MediaIoBaseDownload(fh, request)
                done = False
                while done is False:
                    status, done = downloader.next_chunk()
                    print(f"[{i+1}/{len(items)}] Download {int(status.progress() * 100)}%")
            print(f"Download of {len(items)} files complete")

## Class to combine Google Drive API with ipywidgets

In [4]:
class WidgetDriveDownloader(DriveDownloader):
    """Class that adds the convenient ipywidget functionality
   
   Populates the defaults for searching and downloading.
   Makes the repetitive (but not qutie scriptable) process of 
   downloading particular files painless.
   """
    
    def __init__(self):
        """Inherit and setup the UI"""

        super().__init__()
        self.setup_widgets()
        

    def setup_widgets(self):
        """ipywidget setup: textboxes and buttons"""
        
        # first: make input textboxes for user defined parameters
        self.ui = widgets.VBox([
            widgets.Text(
                    description='Folder name',
                    placeholder=f'eg: {self.folder_name}', 
                    value=f"{self.folder_name}" # use the __init__ values as defaults
                    
            ),
            widgets.Text(
                description='Folder ID',
                placeholder=f'eg: {self.folder_id}', 
                value=f"{self.folder_id}" # use the __init__ values as defaults
            ),
            widgets.Text(
                description='Search Key',
                placeholder=f'eg: {self.search_key}',     
                value=f"{self.search_key}" # use the __init__ values as defaults
                
            ), 
             widgets.Text(
                description='Path to save',
                placeholder=f'eg: {self.path}', 
                value=f"{self.path}" # use the __init__ values as defaults
            ), 
        ])   
        self.out = widgets.interactive_output(
            self.set_inputs, {
                'folder_name': self.ui.children[0],
                'folder_id': self.ui.children[1], 
                'search_key': self.ui.children[2], 
                'path': self.ui.children[3],
            }
        )

        # next, make buttons for starting the search and starting the download
        self.button_search = widgets.Button(
            button_style='success', 
            tooltip='Click to search gdrive folder ID for files matching Search Key',
            description='Search gdrive',
        )
        self.button_search.on_click(self.search)

        self.button_download = widgets.Button(
            button_style='success', 
            tooltip='Click to download the files to the Path to save',
            description='Download files',
        )
        self.button_download.on_click(self.download)
        self.output = widgets.Output()
        

    def set_inputs(self, folder_name, folder_id, search_key, path):
        """callback for mapping the input textboxes to the object attributes"""
        
        self.folder_name = folder_name
        self.folder_id = folder_id
        self.search_key = search_key
        self.path = path
        print((self.folder_name, self.folder_id, self.search_key, self.path))


    def search(self, b):
        """button callback for wrapping the gdrive search method"""
        
        with self.output:
            self.output.clear_output()
            print("searching...")
            self.items = self.search_gdrive(self.service, self.folder_id, self.search_key)

    def download(self, b):
        """button callback for wrapping the gdrive download method"""

        with self.output:
            self.output.clear_output()
            print("downloading...")
            self.download_gdrive(self.service, self.items, self.path, self.folder_name)

## Dashboard to search for files in a gdrive Folder ID, download those that match a Search Key and save them to a Path

In [5]:
downloader = WidgetDriveDownloader()

In [6]:
# follow the instruction to get the client_id.json credentials
# reference: https://medium.com/@umdfirecoml/a-step-by-step-guide-on-how-to-download-your-google-drive-data-to-your-jupyter-notebook-using-the-52f4ce63c66c
# and choose the folder where you store the credentials as "path_to_credentials"
downloader.authenticate_gdrive(path_to_credentials='/home/ubuntu/utilities/GoogleDrive_downloader_ipywidgets/credentials/')

In [7]:
print("Type your search query into the input boxes below:")
display(downloader.ui, downloader.out)
print("Then click the buttons to search and then download:")
display(downloader.button_search, downloader.button_download, downloader.output)

Type your search query into the input boxes below:


VBox(children=(Text(value='test1', description='Folder name', placeholder='eg: test1'), Text(value='17VB4dXCU-…

Output()

Then click the buttons to search and then download:


Button(button_style='success', description='Search gdrive', style=ButtonStyle(), tooltip='Click to search gdri…

Button(button_style='success', description='Download files', style=ButtonStyle(), tooltip='Click to download t…

Output()

In [8]:
# check the file names and file sizes that were downloaded
!ls -lath {downloader.path + downloader.folder_name}

ls: cannot access './data/test1': No such file or directory


In [9]:
# find . -name "*.zip" -exec unar -d {} \;

In [10]:
#### Useful folder ID / folder name combinations for our project

# folder_id = '1k_VWJF7XEdJlrxgRqOh5nZKusbpTdUJe'  # TsushimaKatuki_sampleTasking
# folder_id = '192J6teGXae-BnM57r9GNUadeBf-amnAS'  # shimane_sampleTasking
# folder_id = '1TLLxCSAepRXM3PDtUoVMDNtdBzHBjkxz'  # yonaguni_SampleTasking
# folder_id = '1sw9xgzmVULTJVcVHrf_xpq7i5o9Qy3EN'  # Shimane-28052020-1_PAN
# folder_id = '1cTpBmP3vrZ8sfxzZSsIo1TUamTPUzuFg'  # Shimane-28052020-1_MS
# folder_id = '1GSBv7LlQCvv0Nt9HqgF6Pd77oeCRUM2z'  # Shimane-28052020-2_PAN
# folder_id = '1T-CHk7eUarJVqJDSg06OIvKtRIPiv25W'  # Shimane-28052020-2_MS


# search_key = '.tif'
# search_key = '.xlsx'