# To run this script, you will need your user token (generate one if you don't have it).
https://github.com/settings/tokens

And save it as a file called _github_token.txt_ in a folder called _reusable_code_ on the Jupyter home

In [11]:
from nbformat import read, write
import github
import requests
import re
import os
import pathlib
from datetime import datetime

In [12]:
# Pick up Github token for authentication purposes 
mytoken=open('/home/jupyter/reusable_code/github_personal_token.txt',"r").read()


In [13]:
#Create GitHub connection/ instance
g = github.Github(mytoken)

# Get your specific user (update with your username)
myuser=g.get_user()
print(myuser.login)

# Subset to just the ITV repo
#ITV_org=g.get_organization('ITV').get_repos()



In [14]:
#myuser.create_repo(name='backup_ITV')

In [15]:
backup_repo=myuser.get_repo(name='backup_ITV')

In [16]:
# Define a function to remove outputs from notebooks to make the file sizes smaller. Later on, the API struggles to download 
# contents of files greater than 1MB so you don't want visuals etc.

def strip_output(nb):
    for cell in nb.cells:
        if hasattr(cell, "outputs"):
            cell.outputs = []
        if hasattr(cell, "prompt_number"):
            del cell["prompt_number"]

#nb = read(open(filepath), 4)
#strip_output(nb)
#write(nb, open("my_notebook_cleaned.ipynb", "w"), 4)

In [17]:
directory=r'/home/jupyter/' # Specify directory to loop through
myfiles=[] # Initialise empty list

file_exceptions=['client_secrets.json','TalonOneAPIKey.txt','token.pickle','trellocreds.pickle','github_token.txt','github_personal_token.txt'] # Define list of files to exempt
extensions=['.ipynb','.py','.txt','.sql'] # Define file extensions to include (in lower case)

extensions=[i.lower() for i in extensions]
for root, dirs, files in os.walk(directory): # Iterate all folders and subfolders
    for name in files:
        if name[0]!='.' and name not in file_exceptions and pathlib.Path(name).suffix.lower() in extensions:  # Ignore files beginning with '.'-- these are system checkpoints
            
            if len([x for x in root.split('/') if len(x)>0 and x[0]=='.'])>0:  # Ditto ignore folders with '.'
                #print('Not a good path: ',os.path.join(root, name))
                pass
            else:
                filepath=os.path.join(root, name)
                #print(filepath)
                myfiles.append(filepath)

In [18]:
subset=['/home/jupyter/reusable_code/trello_generic.py','/home/jupyter/BritBox Admin/Export Insight Workstack.ipynb']
subset=[i for i in myfiles if i.split('/')[3][:5]!='bbdig' and i.split('/')[3]!='dataserv']

In [19]:
failed_updates=[]
#for filepath in myfiles: # Loop through files in local directory
for filepath in subset: # Loop through files in local directory
    
    now = datetime.now().strftime("%d/%m/%Y %H:%M:%S") # Store timestamp
    destPath=filepath.replace(directory,'') # Determine the directory name to write to
    
    # Before backing up, we need to sanitise the notebook file(s) and remove outputs, which make the files massive, because you cannot then later update them
    if pathlib.Path(filepath).suffix.lower()=='.ipynb':
        nb = read(open(filepath), 4) # Open file
        strip_output(nb) # Run function defined above to remove outputs
        write(nb, open("my_notebook_cleaned.ipynb", "w"), 4) # Store in a temporary location
        filecontents=open("my_notebook_cleaned.ipynb", 'rb').read() # Now re-read that templocation in as bytes (that's what the 'rb' does))
    else:
        filecontents=open(filepath, 'rb').read()
        

    # Check if the file exists already
    existing_content=None # Reset var to None
    
    # If file exists, update it
    try:
        existing_content=backup_repo.get_contents(path=destPath)
        backup_repo.update_file(path=destPath,message='Lazy backup at {}'.format(now),content=filecontents,sha=existing_content.sha)
        print('Updated {}'.format(filepath))
    except Exception as ex:
        # print(ex)
        try:
            # If file doesn't exist, create it
            backup_repo.create_file(path=destPath,message='Lazy backup at {}'.format(now),content=filecontents)
            print('Loaded {}'.format(filepath))
        except Exception as ex2:
            print(ex2)
            print ('Could not update or create file: {}'.format(filepath))
            failed_updates.append(filepath)
    


In [10]:
failed_updates

In [66]:

for filepath in failed_updates[:15]: # Loop through files in local directory
    now = datetime.now().strftime("%d/%m/%Y %H:%M:%S") # Store timestamp
    destPath=filepath.replace(directory,'') # Determine the directory name to write to
    
    # Before backing up, we need to sanitise the notebook file(s) and remove outputs, which make the files massive, because you cannot then later update them
    if pathlib.Path(filepath).suffix.lower()=='.ipynb':
        nb = read(open(filepath), 4) # Open file
        strip_output(nb) # Run function defined above to remove outputs
        write(nb, open("my_notebook_cleaned.ipynb", "w"), 4) # Store in a temporary location
        filecontents=open("my_notebook_cleaned.ipynb", 'rb').read() # Now re-read that templocation in as bytes (that's what the 'rb' does))
    else:
        filecontents=open(filepath, 'rb').read()
        

    # Check if the file exists already
    existing_content=None # Reset var to None
    
    # If file exists, update it
    try:
        existing_content=backup_repo.get_contents(path=destPath)
        backup_repo.update_file(path=destPath,message='Lazy backup at {}'.format(now),content=filecontents,sha=existing_content.sha)
        print('Updated {}'.format(filepath))
    except Exception as ex:
        # print(ex)
        try:
            # If file doesn't exist, create it
            backup_repo.create_file(path=destPath,message='Lazy backup at {}'.format(now),content=filecontents)
            print('Loaded {}'.format(filepath))
        except Exception as ex2:
            print(ex2)
            print ('Could not update or create file: {}'.format(filepath))
            failed_updates.append(filepath)
    
