In [1]:
import io
import os
import oauth2client
from oauth2client import file, client, tools
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload

In [2]:
import ipywidgets as widgets

## Class to wrap Google Drive API 

In [3]:
class DriveDownloader():
    def __init__(self):
        # set demo defaults
        self.folder_name = 'test1' 
        self.folder_id = '17VB4dXCU-FhW_qNoUphmrjjGJ2brx3f1'
        self.search_key = '.md'
        self.path = './data/'
        self.service = None
        self.items = None

    
    def authenticate_gdrive(self, path_to_credentials):
        # source:
        # https://developers.google.com/drive/api/v3/quickstart/python?authuser=1
        # https://medium.com/@umdfirecoml/a-step-by-step-guide-on-how-to-download-your-google-drive-data-to-your-jupyter-notebook-using-the-52f4ce63c66c
        if self.service is None:
            os.makedirs(path_to_credentials, exist_ok=True)
            obj = lambda: None
            lmao = {"auth_host_name":'localhost', 'noauth_local_webserver':'store_true', 'auth_host_port':[8080, 8090], 'logging_level':'ERROR'}
            for k, v in lmao.items():
                setattr(obj, k, v)

            # authorization boilerplate code
            SCOPES = 'https://www.googleapis.com/auth/drive.readonly'
            store = file.Storage(path_to_credentials + 'token.json')
            creds = store.get()
            # The following will give you a link if token.json does not exist, the link allows the user to give this app permission
            if not creds or creds.invalid:
                flow = client.flow_from_clientsecrets(path_to_credentials + 'client_id.json', SCOPES)
                creds = tools.run_flow(flow, store, obj)

            service = build('drive', 'v3', credentials=creds)
            self.service = service


    def search_gdrive(self, service, folder_id, search_key):
        if self.service is None:
            print("[error in search_gdrive]: no service. run authenticate_gdrive() first")
            return None
        else:
            query_string = f"name contains '{search_key}' and parents in '{folder_id}'"
            print(query_string)
            results = service.files().list(
                    q=query_string,
                    spaces='drive',
                    pageSize=10, fields="nextPageToken, files(id, name)").execute()
            items = results.get('files', [])

            if not items:
                print('No files found.')
            else:
                print('Files:')
                for i, item in enumerate(items):
                    print(f"item_id: {i}, filename: {item['name']} , file_id: {item['id']}")

            self.items = items
            return items


    def download_gdrive(self, service, items, path, folder_name):
        path_to_save = path + folder_name + '/'
        os.makedirs(path_to_save, exist_ok=True)
        
        if items is None or items==[]:
            print("[error in download_gdrive]: no items to download. Run search_gdrive() first")
        else:
            for item in self.items: 
                file_id = item['id']
                filename = item['name']

                request = service.files().get_media(fileId=file_id)
                fh = io.FileIO(path_to_save + filename, mode='w')

                downloader = MediaIoBaseDownload(fh, request)
                done = False
                while done is False:
                    status, done = downloader.next_chunk()
                    print(f"Download {int(status.progress() * 100)}%")

## Class to combine Google Drive API with ipywidgets

In [4]:
class WidgetDriveDownloader(DriveDownloader):
    def __init__(self):
        super().__init__()
        self.setup_widgets()
        
    # prepare User Interface with ipywidgets
    def setup_widgets(self):
    # input text boxes for user defined parameters
        self.ui = widgets.VBox([
            widgets.Text(
                    description='Folder name',
                    placeholder=f'eg: {self.folder_name}', 
                    value=f"{self.folder_name}" # use the __init__ values as defaults
                    
            ),
            widgets.Text(
                description='Folder ID',
                placeholder=f'eg: {self.folder_id}', 
                value=f"{self.folder_id}" # use the __init__ values as defaults
            ),
            widgets.Text(
                description='Search Key',
                placeholder=f'eg: {self.search_key}',     
                value=f"{self.search_key}" # use the __init__ values as defaults
                
            ), 
             widgets.Text(
                description='Path to save',
                placeholder=f'eg: {self.path}', 
                value=f"{self.path}" # use the __init__ values as defaults
            ), 
        ])   
        self.out = widgets.interactive_output(
            self.set_inputs, {
                'folder_name': self.ui.children[0],
                'folder_id': self.ui.children[1], 
                'search_key': self.ui.children[2], 
                'path': self.ui.children[3],
            }
        )

        # buttons for starting the search and starting the download
        self.button_search = widgets.Button(
            button_style='success', 
            tooltip='Click to search gdrive folder ID for files matching Search Key',
            description='Search gdrive',
        )
        self.button_search.on_click(self.search)

        self.button_download = widgets.Button(
            button_style='success', 
            tooltip='Click to download the files to the Path to save',
            description='Download files',
        )
        self.button_download.on_click(self.download)
        self.output = widgets.Output()
        

    def set_inputs(self, folder_name, folder_id, search_key, path):
        self.folder_name = folder_name
        self.folder_id = folder_id
        self.search_key = search_key
        self.path = path
        print((self.folder_name, self.folder_id, self.search_key, self.path))


    def search(self, b):
        with self.output:
            self.output.clear_output()
            print("searching...")
            self.search_gdrive(self.service, self.folder_id, self.search_key)

    def download(self, b):
        with self.output:
            self.output.clear_output()
            print("downloading...")
            self.download_gdrive(self.service, self.items, self.path, self.folder_name)

## Dashboard to search for files in a gdrive Folder ID, download those that match a Search Key and save them to a Path

In [5]:
downloader = WidgetDriveDownloader()

In [6]:
# follow the instruction to get the client_id.json credentials
# reference: https://medium.com/@umdfirecoml/a-step-by-step-guide-on-how-to-download-your-google-drive-data-to-your-jupyter-notebook-using-the-52f4ce63c66c
# and choose the folder where you store the credentials as "path_to_credentials"
downloader.authenticate_gdrive(path_to_credentials='./credentials/')

In [7]:
print("Type your search query into the input boxes below:")
display(downloader.ui, downloader.out)
print("Then click the buttons to search and then download:")
display(downloader.button_search, downloader.button_download, downloader.output)

Type your search query into the input boxes below:


VBox(children=(Text(value='test1', description='Folder name', placeholder='eg: test1'), Text(value='17VB4dXCU-…

Output()

Then click the buttons to search and then download:


Button(button_style='success', description='Search gdrive', style=ButtonStyle(), tooltip='Click to search gdri…

Button(button_style='success', description='Download files', style=ButtonStyle(), tooltip='Click to download t…

Output()

In [8]:
# check the file names and file sizes that were downloaded
!ls -lath {downloader.path + downloader.folder_name}

total 16K
-rw-rw-r-- 1 ljb ljb 1.3K  6月 29 22:34 markdown.md
drwxrwxr-x 2 ljb ljb 4.0K  6月 29 22:34 .
-rw-rw-r-- 1 ljb ljb 1.3K  6月 29 22:34 test_markdown.md
drwxrwxr-x 3 ljb ljb 4.0K  6月 29 22:28 ..
