In [68]:
import os
import sys
from tqdm import tqdm

from panopto_dl.PanoptoDownloader import PanoptoDownloader

# Google Drive
from __future__ import print_function
import requests
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from googleapiclient.http import MediaFileUpload, MediaInMemoryUpload
from google.oauth2.credentials import Credentials
from tabulate import tabulate

In [76]:
URL = "https://d2y36twrtb17ty.cloudfront.net/sessions/9ff73153-b779-496c-9887-aef6017f9eeb/1d06229d-06cb-4851-a3cb-aef6017f9ef8-8c3f5279-5816-498a-ad5e-af640185811c.mp4"
PATH = "./output.mp4"


def callback(progress: int):
    """
    :param progress: Downloading progress. From 0 to 100
    """
    #print(f"{progress} / 100")

In [42]:
PanoptoDownloader.download(
            URL,
            PATH,
            callback
        )

#### Google Drive API

In [5]:
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/drive.metadata.readonly']

def get_gdrive_service():
    creds = None
    # The file token.pickle stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)
    # return Google Drive API service
    return build('drive', 'v3', credentials=creds)

In [7]:
def list_files(items):
    """given items returned by Google Drive API, prints them in a tabular way"""
    if not items:
        # empty drive
        print('No files found.')
    else:
        rows = []
        for item in items:
            # get the File ID
            id = item["id"]
            # get the name of file
            name = item["name"]
            try:
                # parent directory ID
                parents = item["parents"]
            except:
                # has no parrents
                parents = "N/A"
            try:
                # get the size in nice bytes format (KB, MB, etc.)
                size = get_size_format(int(item["size"]))
            except:
                # not a file, may be a folder
                size = "N/A"
            # get the Google Drive type of file
            mime_type = item["mimeType"]
            # get last modified date time
            modified_time = item["modifiedTime"]
            # append everything to the list
            rows.append((id, name, parents, size, mime_type, modified_time))
        print("Files:")
        # convert to a human readable table
        table = tabulate(rows, headers=["ID", "Name", "Parents", "Size", "Type", "Modified Time"])
        # print the table
        print(table)

In [11]:
service = get_gdrive_service()
# Call the Drive v3 API
results = service.files().list(
    pageSize=5, fields="nextPageToken, files(id, name, mimeType, size, parents, modifiedTime)").execute()
# get the results
items = results.get('files', [])
# list all 20 files & folders
list_files(items)

Files:
ID                                            Name                                   Parents                  Size    Type                                  Modified Time
--------------------------------------------  -------------------------------------  -----------------------  ------  ------------------------------------  ------------------------
10hWm--LYvYOnsRsN46-R6xDAToAQGy--             MIT Canvas                             ['0AL8n4UP6vUEiUk9PVA']  N/A     application/vnd.google-apps.folder    2023-08-13T23:51:20.756Z
1nR8sIPhx1AJRX3NjJL6P07XnTqZKCQnP             MIT Panopto                            ['0AL8n4UP6vUEiUk9PVA']  N/A     application/vnd.google-apps.folder    2023-08-13T23:51:13.849Z
1acF7EezYl69Qqmkefuag-Hjrtdqulgqv             ADIA Labs Competition                  ['0AL8n4UP6vUEiUk9PVA']  N/A     application/vnd.google-apps.folder    2023-08-12T17:10:29.232Z
13_PWdhThMr6roxb-UFiJj4YAFOj8e_bv3Vx9UHQdyBQ  hashirshoaeb resume -Updated 12.08.23  N/A           

#### Upload Files To Google Drive

In [12]:
# Your credentials JSON file from the API Console
CLIENT_SECRET_FILE = 'credentials.json'
# Token file to store user's access and refresh tokens
TOKEN_FILE = 'token.json'
# ID of the folder where you want to upload the file
FOLDER_ID = '1nR8sIPhx1AJRX3NjJL6P07XnTqZKCQnP'

In [None]:
# Authenticate using the stored token or by authorizing the app
creds = None
if os.path.exists(TOKEN_FILE):
    creds = Credentials.from_authorized_user_file(TOKEN_FILE)
if not creds or not creds.valid:
    if creds and creds.expired and creds.refresh_token:
        creds.refresh(Request())
    else:
        flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRET_FILE, ['https://www.googleapis.com/auth/drive.file'])
        creds = flow.run_local_server(port=0)
    with open(TOKEN_FILE, 'w') as token:
        token.write(creds.to_json())

# Create a Google Drive API service
drive_service = build('drive', 'v3', credentials=creds)

In [21]:
# Upload the MP4 file
file_metadata = {'name': 'test.txt', 'parents': [FOLDER_ID]}
media = MediaFileUpload('test.txt', 
                        #mimetype='video/mp4')
                        resumable=True)
file = drive_service.files().create(body=file_metadata, media_body=media, fields='id').execute()
print('File ID:', file.get('id'))

File ID: 1oGfWdI5HsPcKqhHKbzVFqXJI4YhTZ-To


In [25]:
"./recordings/test.mp4".split("/")[-1]

'test.mp4'

### Upload Panopto to Google Drive

In [55]:
# test one file
URL = "https://d2y36twrtb17ty.cloudfront.net/sessions/ad81e068-1e34-45ba-8f40-afe10106296b/1fcfa88f-ec37-4c5f-a97a-afe101062977-138b778d-7fc3-422e-8e94-aff20108f2ea.mp4"
PATH = URL.split("/")[-1]

FOLDER_ID = '1nR8sIPhx1AJRX3NjJL6P07XnTqZKCQnP'

In [56]:
PanoptoDownloader.download(
            URL,
            PATH,
            callback
        )

In [57]:
file_metadata = {'name': PATH, 'parents': [FOLDER_ID]}
media = MediaFileUpload(PATH, 
                        #mimetype='video/mp4')
                        resumable=True)

file = drive_service.files().create(body=file_metadata, media_body=media, fields='id').execute()

In [58]:
os.remove(PATH)

#### Fast Download
Having to download and save mp4 files and then uploading them again to Google Drive takes very long. This is because upload speed is typically much much slower than download speed. Upload time takes significantly longer, especially for long videos (i.e. over an hour).

We can download mp4 links directly to Google Drive without having to save them an mp4 file first and then uploading. This will significantly speed up the process.

In [69]:
# URL of the MP4 file you want to download
URL = "https://d2y36twrtb17ty.cloudfront.net/sessions/ad81e068-1e34-45ba-8f40-afe10106296b/1fcfa88f-ec37-4c5f-a97a-afe101062977-138b778d-7fc3-422e-8e94-aff20108f2ea.mp4"
PATH = URL.split("/")[-1]
FOLDER_ID = '1nR8sIPhx1AJRX3NjJL6P07XnTqZKCQnP'

response = requests.get(URL)
content = response.content

In [70]:
# Create a media object with the downloaded content
media = MediaInMemoryUpload(content, mimetype='video/mp4')

# Create a file in Google Drive with the downloaded content
file_metadata = {'name': PATH, 'parents': [FOLDER_ID]}
file = drive_service.files().create(body=file_metadata, media_body=media, fields='id').execute()

print('File ID:', file.get('id'))

File ID: 1YDT5O4LvQviHq0t40GjbUJtSEBvXXBqj


Bulk Download & Upload

In [71]:
with open("test.txt", "r") as f:
    lines = f.readlines()
    total_lines = len(lines)
    for line in lines:
        print(line)

https://d2y36twrtb17ty.cloudfront.net/sessions/f3634154-fe0f-4307-95fb-b01200eceb13/0f72a810-ff67-4e67-9b28-b01200eceb1c-4d190a5c-6efa-4b57-86c8-b0130093ff51.mp4

https://d2y36twrtb17ty.cloudfront.net/sessions/add26c14-41de-497c-964b-b01200eba395/f4110f91-2b3f-4138-a79b-b01200eba39f-712a493d-ed10-430c-a46e-b01300950919.mp4

https://d2y36twrtb17ty.cloudfront.net/sessions/fd4246ee-4b2b-4ec3-9b45-b01200d706cb/cd597a92-9756-4bf0-a5ec-b01200d706e2-5031e44c-405a-4471-b336-b013009630a6.mp4

https://d2y36twrtb17ty.cloudfront.net/sessions/cadaea1e-9d77-47c2-af08-afac0119c1f4/82fda9f6-2167-47b8-a441-afac0119c1fd-4201bf31-cfac-4db0-a9d1-b007014a1c2c.mp4

https://d2y36twrtb17ty.cloudfront.net/sessions/54fd24ce-d00b-4084-ac5d-afac01190fc9/1344212d-6b1f-499b-ad3a-afac01190fd2-460fa2ee-6768-47db-9710-b00701315170.mp4

https://d2y36twrtb17ty.cloudfront.net/sessions/583d73bd-3c34-4ed0-9d81-afac011a2f15/9dd07100-8ee7-4f9a-bbfa-afac011a2f1f-9a2eca01-fafb-463f-be15-b00700f67eed.mp4

https://d2y36twrtb17ty

In [78]:
# save to local
LOCAL_FOLDER = "./test"
counter = 1
with open("test.txt", "r") as f:
    lines = f.readlines()
    total_lines = len(lines)
    for url in tqdm(lines, total=total_lines, position=0):
        try:
            url = url.strip()
            PATH = os.path.join(LOCAL_FOLDER, url.split("/")[-1])
            PanoptoDownloader.download(url,
                                       PATH,
                                       callback)

        except Exception as e:
            print(e)
            
        print("Completed: {}/{}".format(counter, total_lines))
        counter += 1

 10%|████▍                                       | 1/10 [00:25<03:45, 25.06s/it]

Completed: 1/10


 20%|████████▊                                   | 2/10 [01:08<04:47, 35.90s/it]

Completed: 2/10


 20%|████████▊                                   | 2/10 [01:16<05:06, 38.31s/it]


KeyboardInterrupt: 

In [None]:
# save to Google Drive
with open("test.txt", "r") as f:
    lines = f.readlines()
    total_lines = len(lines)
    for url in tqdm(lines, total=total_lines):
        try:
            PATH = url.split("/")[-1]
            PanoptoDownloader.download(url,
                                       PATH,
                                       callback)
            
            file_metadata = {'name': PATH, 'parents': [FOLDER_ID]}
            media = MediaFileUpload(PATH, 
                                    #mimetype='video/mp4')
                                    resumable=True)

            file = drive_service.files().create(body=file_metadata, media_body=media, fields='id').execute()
            
            os.remove(PATH)
        
        except Exception as e:
            print(e)