<a href="https://colab.research.google.com/github/stmulugheta/AI-Projects-2021/blob/main/Copy_directories_from_Google_Drive_to_DagsHub_storage.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<center> <a href="https://dagshub.com"><img alt=\"DAGsHub\" width=500px src=https://raw.githubusercontent.com/DAGsHub/client/master/dagshub_github.png></a> </center>

<center><h1><b>The Dag Walker</b></h1></center>

<center><h3>Transfer directories from Google Drive to DagsHub storage with a click of a button</h3></center>

This notebook is designed to automate the process of coping directories from Google Drive to DagsHub storage. **All you need to do is check some boxes, fill in your details, run the notebook, and you're set to go!**

Behind the scenes, the directory will be copied to Colab run time, versioned by DVC and Git, and pushed to DagsHub storage. This notebook is part of DagsHub's added value session that shows how to use it.

<iframe width="560" height="315" src="https://www.youtube.com/embed/f08v0ulIJYM" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>

<img src="https://upload.wikimedia.org/wikipedia/commons/c/c9/Discord-New-Logo.png" height="23"/> [Discord Channel](https://discord.com/channels/698874030052212737/698874030572437526) | <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/c/c9/Linkedin.svg/1200px-Linkedin.svg.png" height="23"/> [LinkedIn](https://www.linkedin.com/in/nir-barazida/) | <img src="https://help.twitter.com/content/dam/help-twitter/brand/logo.png" height="25"/> [Twitter](https://twitter.com/barazida) | <img src="https://res-2.cloudinary.com/crunchbase-production/image/upload/c_lpad,f_auto,q_auto:eco/plwmuai9t3okgwbuhkho" height="30"/> [DAGsHub](https://dagshub.com) | <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/9/91/Octicons-mark-github.svg/1200px-Octicons-mark-github.svg.png" height="25"/> [GitHub](https://github.com/DAGsHub) 

In [None]:
from IPython.display import HTML
HTML('<iframe width="600" height="450" src="https://www.youtube.com/embed/f08v0ulIJYM" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>')

In [None]:
#@title Notebook configurations  🏗

#@markdown Is the repository mirrored from GitHub to DagsHub? 
MIRROR = False #@param {type:"boolean"}

#@markdown Initialize DVC in this repository (Should only done once **per reository**)
INIT_DVC = True #@param {type:"boolean"}

#@markdown Set DagsHub storage as DVC's remote (Should only done once per reository)
SET_DVC_REMOTE = True #@param {type:"boolean"}

#@markdown Set DVC’s user configurations for DagsHub user (will be set locally - should only done **per runtime**)
SET_DVC_USER = True #@param {type:"boolean"}

#@markdown Pull the changes from the DagsHub storage to Colab runtime
PULL_DVC = True #@param {type:"boolean"}

#@markdown ---


In [None]:
#@title DagsHub Configurations 🐶

#@markdown Enter the DAGsHub repository owner name:
DAGSHUB_REPO_OWNER= "nirbarazida" #@param {type:"string"} 

#@markdown Enter the DAGsHub repository name:
DAGSHUB_REPO_NAME= "test-migrate-data" #@param {type:"string"}

#@markdown Enter the username of your DAGsHub account:
DAGSHUB_USER_NAME = "nirbarazida" #@param {type:"string"}

#@markdown Enter the email for your DAGsHub account:
DAGSHUB_EMAIL = "nirbarazida@gmail.com" #@param {type:"string"}

#@markdown Enter the branch name:
BRANCH= "master" #@param {type:"string"}

In [None]:
#@title Target directory configurations 🎯

#@markdown Repository directory path (e.g. task-2-data-processing/raw-data) :
DATA_DIR_PATH= "" #@param {type:"string"} 

#@markdown Google Drive directory path (e.g. project-name/task-2-data-labaleing/labeled-data) :
GDRIVE_DATA_PATH= "" #@param {type:"string"}

# Additional information 💡 

**DagsHub**

In [None]:
import getpass
DAGSHUB_TOKEN = getpass.getpass('Please enter your DAGsHub token or password: ')

**GitHub**

In [None]:
if MIRROR:
  # Set GitHub variables
  GITHUB_REPO_OWNER = input("What is the repository owner username?")
  GITHUB_REPO_NAME = input("What is your GitHub repository name?")
  GITHUB_USER_NAME = input("What is your GitHub username?")
  GITHUB_EMAIL = input("What is the email for your GitHub account:")
  GITHUB_TOKEN = getpass.getpass('Please enter your GitHub token or password: ')

# Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Black Magic 🪄

In [None]:
import requests
import datetime
import os

CLONE = True
PULL_GIT = True

**Configure Git**

In [None]:
!git config --global user.email {DAGSHUB_EMAIL}
!git config --global user.name {DAGSHUB_USER_NAME}

**Clone the Repository**

In [None]:
if CLONE:
  !git clone -b {BRANCH} https://{DAGSHUB_USER_NAME}:{DAGSHUB_TOKEN}@dagshub.com/{DAGSHUB_REPO_OWNER}/{DAGSHUB_REPO_NAME}.git
  %cd {DAGSHUB_REPO_NAME}
if PULL_GIT:
  !git pull

**Install Requirements**

In [None]:
from pathlib import Path

!pip install --upgrade pip --quiet

!pip install dvc>=2.8.1 --quiet

**Configure DVC**

In [None]:
# Import DVC package - relevant only when working in a Colab environment
import dvc

if INIT_DVC:
  # initialize DVC
  !dvc init

if SET_DVC_REMOTE:
  # Set DVC remote storage as 'DAGsHub storage'
  !dvc remote add origin --local https://dagshub.com/{DAGSHUB_REPO_OWNER}/{DAGSHUB_REPO_NAME}.dvc

if SET_DVC_USER:
  # General DVC user configuration
  !dvc remote modify --local origin auth basic
  !dvc remote modify --local origin user {DAGSHUB_USER_NAME}
  !dvc remote modify --local origin password {DAGSHUB_TOKEN}

if PULL_DVC:
  !dvc pull -r origin <& dev_null

  # Make sure that all files were pulled
  !dvc pull -r origin
  

In [None]:
def git_push():
  "Push files to remote Git server on DAGsHub"
  !git push https://{DAGSHUB_USER_NAME}:{DAGSHUB_TOKEN}@dagshub.com/{DAGSHUB_REPO_OWNER}/{DAGSHUB_REPO_NAME}.git 

In [None]:
from pathlib import Path
import shutil

dest_dir_path = os.path.join(os.getcwd(),DATA_DIR_PATH)
gdrive_dir_path = os.path.join("/content/drive/MyDrive",GDRIVE_DATA_PATH)

try:
  shutil.copytree(gdrive_dir_path,dest_dir_path)
except FileExistsError as e:
  print(f"Error: File already exists at destination directory: {e.filename}",)

# Add files to DVC tracking



In [None]:
!git status

In [None]:
!dvc add {dest_dir_path.replace(" ","\ ")}

**WARNING: Only use `git add .` if you want to track all the new chages with Git! Otherwise, specify all the files to add to Git tracking.**

In [None]:
!git add {dest_dir_path.replace(" ","\ ")+".dvc"} 

In [None]:
!git commit -m "Move the data dir to DagsHub storage"

In [None]:
!git status

# Push the files to the remotes

In [None]:
# git_push() 

# !dvc push -r origin