## How To Do File Download From a URL

In [None]:
from urllib.request import urlretrieve
from os.path import isfile, isdir
from tqdm import tqdm

resource_dir = 'resources/'
# Make sure vgg exists
if not isdir(resource_dir):
    raise Exception("VGG directory doesn't exist!")

class DLProgress(tqdm):
    last_block = 0

    def hook(self, block_num=1, block_size=1, total_size=None):
        self.total = total_size
        self.update((block_num - self.last_block) * block_size)
        self.last_block = block_num

if not isfile(resource_dir + "vgg16.npy"):
    with DLProgress(unit='B', unit_scale=True, miniters=1, desc='VGG16 Parameters') as pbar:
        urlretrieve(
            'https://s3.amazonaws.com/content.udacity-data.com/nd101/vgg16.npy',
            resource_dir + 'vgg16.npy', # Magic, directory is resource_dir and file is vgg16.npy
            pbar.hook)
else:
    print("Parameter file already exists!")

## How to do tar ball download and extract
## Please don't run this cell ...there is huge file, just learn from the code

In [None]:
import tarfile

dataset_folder_path = 'flower_photos'

class DLProgress(tqdm):
    last_block = 0

    def hook(self, block_num=1, block_size=1, total_size=None):
        self.total = total_size
        self.update((block_num - self.last_block) * block_size)
        self.last_block = block_num

if not isfile('flower_photos.tar.gz'):
    with DLProgress(unit='B', unit_scale=True, miniters=1, desc='Flowers Dataset') as pbar:
        urlretrieve(
            'http://download.tensorflow.org/example_images/flower_photos.tgz',
            'flower_photos.tar.gz',
            pbar.hook)

if not isdir(dataset_folder_path):
    with tarfile.open('flower_photos.tar.gz') as tar:
        tar.extractall()
        tar.close()

## JSON file parsing and actions

In [None]:
import json

def create_data_pipeline():
    import dataPipeline
    
    try:
        with open('tables.json', r) as tbls:
            tables = json.loads(tbls.read())
    except FileNotFoundError:
        raise SystemExit("Please make sure tables.json exists")
        
    print("Creating data pipelines per each table")
    
    for table in tables:
        if table['source type'] == 'dynamodb':
            table_name = table['table_name']
            
            dataPipeline.deploy_pipeline(table_name) #data pipeline has function named as deploy
        
            
            
            
            