# Git Parser

In [91]:
import os
import git
import shutil
import tldextract

from git import Repo
from contextlib import suppress

In [137]:
# set up variables
dst = 'data/files'
domain_list = ['github.com', 'gitlab.com', 'bitbucket.org']

In [132]:
def copydata(src, dst, symlinks=False, ignore=None):
    """
    Copy directory with files inside from src to dst
    """
    for item in os.listdir(src):
        s = os.path.join(src, item)
        d = os.path.join(dst, item)
        if os.path.isdir(s):
            shutil.copytree(s, d, symlinks, ignore)
        else:
            shutil.copy2(s, d)

            
def dir_from_repo(url, dst):
    """
    """
    # check if domain is correct
    domain = f"{tldextract.extract(url).domain}.{tldextract.extract(url).suffix}"
    if domain in domain_list:
        # get repo data
        url_split = url.split("/", 7)
        dir_path = url_split[7]
        branch = url_split[6]
        repo_url = "/".join(url_split[:5])

        # clone repo
        Repo.clone_from(repo_url, os.path.join(dst, '_tmp'), branch=branch)

        # copy data & remove other files
        copydata(os.path.join(dst, '_tmp', dir_path), dst)
        shutil.rmtree(os.path.join(dst, '_tmp'))


def parse_repo(url, dst_path):
    """
    Parse repository 'url' and save files in 'dst_path'.
    
    :param url : string with the url to the repository
    :param dst_path : string path to the destination directory where files should be stored
    :return : Boolean True when eveything is ok, else False
    """
    # Clone if Repo
    with suppress(Exception): # or, better, a more specific error (or errors)
        Repo.clone_from(url, dst)
        return "Repo cloned."

    # Download if directory
    with suppress(Exception):
        dir_from_repo(url, dst)
        return "Repo with specified directory downloaded"

    return "Please provide a correct url"

In [138]:
# CASE 1
url = 'https://github.com/vercel/next.js.git'
parse_repo(url, dst)

'Repo cloned.'

In [140]:
# CASE 2
url = 'https://github.com/vercel/next.js/tree/alpha/examples/basic-css'
parse_repo(url, dst)

clone


'Repo with specified directory downloaded'

In [141]:
# CASE 3
url = 'https://github.com/vercel'
parse_repo(url, dst)

'Please provide a correct url'