In [1]:
using PyCall

In [2]:
#include("../deps/build.jl")

In [3]:
const pydrive_auth = pyimport_conda("pydrive.auth","pydrive", "conda-forge") |> pywrap
const pydrive_drive = pyimport_conda("pydrive.drive","pydrive", "conda-forge") |> pywrap

__anon__

## Getting client secrets etc
URL https://console.developers.google.com/apis/credentials/wizard?api=drive.googleapis.com
DOCS https://pythonhosted.org/PyDrive/quickstart.html#authentication

In [4]:

function get_auth()
    if !isfile("client_secrets.json")
        println("Please go to https://console.developers.google.com/apis/credentials/wizard?api=drive.googleapis.com")
        println("and generate the credentials. for more info see https://pythonhosted.org/PyDrive/quickstart.html#authentication")
    end
    gauth = pydrive_auth.GoogleAuth()

    
    gauth[:LoadCredentialsFile]("creds.json")
    if gauth[:credentials] === nothing
        println("Please go to the following URL")
        println(gauth[:GetAuthUrl]())
        println("and paste the verification code")
        code = readline()
        gauth[:Auth](code)
    elseif gauth[:access_token_expired]
        gauth[:Refresh]()
    else
        # Initialize from saved creds
        gauth[:Authorize]()
    end
    # Save the current credentials to a file
    gauth[:SaveCredentialsFile]("creds.json")
    gauth
end

get_auth (generic function with 1 method)

In [5]:
Drive(auth = get_auth()) = pywrap(pydrive_drive.GoogleDrive(auth))

Drive (generic function with 2 methods)

In [6]:
struct GDFile
    py::PyObject
end
Base.getindex(gd::GDFile, args...) = gd.py[args...]
metadata(gd::GDFile,field) = py"$(gd.py)[$field]"

filename(gd::GDFile) = metadata(gd, "title")
mimetype(gd::GDFile) = metadata(gd, "mimeType")
Base.show(io::IO, gd::GDFile) = print(io, "Google Drive file: ", filename(gd))


function list_files(
        query = "'root' in parents and trashed=false",
        drive=Drive()
    )
    lf = pycall( drive.ListFile, PyObject, Dict("q"=>query))
    filelist = py"$lf.GetList()"o
    [GDFile(py"$filelist[$ii]"o) for ii in 0:py"len($filelist)"-1]
end

list_files (generic function with 3 methods)

In [7]:
function list_files_in_folder(foldername, drive=Drive())
    folder = first(list_files("title='$foldername' and trashed=false", drive))
    @assert metadata(folder,"mimeType") == "application/vnd.google-apps.folder"
    folderid = metadata(folder, "id")
    list_files("'$(folderid)' in parents and trashed=false", drive)
end



list_files_in_folder (generic function with 2 methods)

### Mappings for mimetypes for Google Apps

Google Apps formats can't be downloaded from drive.
(e.g. google docs, google sheets etc).
But they can be exported into any of the formats from
https://developers.google.com/drive/api/v3/manage-downloads
There reported formats are as per: https://developers.google.com/drive/api/v3/mime-types
We define a mapping for what to do; from the later to the former.

The following map just uses plain formats,
but open-office formats work,
or ms-office formats,
or html or ...

In [8]:
mime_to_extension(::Void)=""

const _plain_mimetype_map  = Dict(
    "application/vnd.google-apps.document" => ("text/plain", ".txt"),
    "application/vnd.google-apps.spreadsheet" => ("text/csv", ".csv"),
    "application/vnd.google-apps.presentation" => ("text/plain", ".txt"),
    "application/vnd.google-apps.drawing" => ("image/svg+xml", ".svg"),
)

Dict{String,Tuple{String,String}} with 4 entries:
  "application/vnd.google-apps.drawing"      => ("image/svg+xml", ".svg")
  "application/vnd.google-apps.spreadsheet"  => ("text/csv", ".csv")
  "application/vnd.google-apps.presentation" => ("text/plain", ".txt")
  "application/vnd.google-apps.document"     => ("text/plain", ".txt")

In [9]:
function safer_joinpath(dirpart, relparts...)
    path = joinpath(dirpart, relparts...)
    if !(startswith(normpath(path), dirpart))
        throw(DomainError("Relative filepath ($relparts) escapes directory. Possible directory traeral attack."))
    end
    path
end

"""
    drive_download(remote::GDFile, localdir; mimetype_map=_plain_mimetype_map)

Download the given `remote` google drive file, into the local directory given by `remote`.
Export (convert) files, according to the mimetype_map.
"""
function drive_download(remote::GDFile, localdir; mimetype_map=_plain_mimetype_map)
    dest_mimetype, ext = get(mimetype_map, mimetype(remote), (nothing,""))
    
    localpath = safer_joinpath(localdir, filename(remote)) * ext
    remote[:GetContentFile](localpath, dest_mimetype)
    localpath
end


drive_download

## DEMO

In [10]:
using DataDeps

register(DataDep("GoogleDriveDemo",
        "Demonstration of google drive",
         list_files_in_folder("Demo"), #Issue: this actually runs the query at Start time, Replace with lazy vector
         fetch_method = drive_download));

In [11]:
collect(eachline(datadep"GoogleDriveDemo/text1.txt"))

[1m[36mINFO: [39m[22m[36mThis program has requested access to the data dependency GoogleDriveDemo.
[39m[1m[36mINFO: [39m[22m[36mwhich is not currently installed. It can be installed automatically, and you will not see this message again.
[39m[1m[36mINFO: [39m[22m[36m
Demonstration of google drive

[39m[1m[36mINFO: [39m[22m[36mDo you want to download the dataset from GDFile[Google Drive file: text1.txt, Google Drive file: text2.txt, Google Drive file: image.bmp] to "/home/wheel/oxinabox/.julia/datadeps/GoogleDriveDemo"?
[39m[1m[36mINFO: [39m[22m[36m[y/n]
[39m

STDIN> y




1-element Array{String,1}:
 "this is the content of the first text file"

In [12]:
readdir(datadep"GoogleDriveDemo")

3-element Array{String,1}:
 "image.bmp"
 "text1.txt"
 "text2.txt"

In [13]:
using DataDeps

register(DataDep("GoogleDriveDemoHard",
        "Demonstration of google drive, with Google Apps file types",
         list_files_in_folder("DemoHard"), #Issue: this actually runs the query at Start time, Replace with lazy vector
         fetch_method = drive_download));

In [14]:
readdir(datadep"GoogleDriveDemoHard")

[1m[36mINFO: [39m[22m[36mThis program has requested access to the data dependency GoogleDriveDemoHard.
[39m[1m[36mINFO: [39m[22m[36mwhich is not currently installed. It can be installed automatically, and you will not see this message again.
[39m[1m[36mINFO: [39m[22m[36m
Demonstration of google drive, with Google Apps file types

[39m[1m[36mINFO: [39m[22m[36mDo you want to download the dataset from GDFile[Google Drive file: Math, Google Drive file: Confusion] to "/home/wheel/oxinabox/.julia/datadeps/GoogleDriveDemoHard"?
[39m[1m[36mINFO: [39m[22m[36m[y/n]
[39m

STDIN> y




2-element Array{String,1}:
 "Confusion.txt"
 "Math.csv"     

In [22]:
readdlm(datadep"GoogleDriveDemoHard/Math.csv", ',', header=true)

(Any[1 1.5 "abc"; 10 1.22 "efg"; 100 0.1 "apples"], AbstractString["Int" "Float" "String"])

In [25]:
readstring(datadep"GoogleDriveDemoHard/Confusion.txt") |> println

﻿This is the world we live in.
wo-oh-oh-oh
These are the hands we’re given


In [15]:
rm(datadep"GoogleDriveDemo", recursive=true)
rm(datadep"GoogleDriveDemoHard", recursive=true)