# If you've already downloaded the training files:

1. Place the file, named "train.tar.gz" into the "data" directory. That will prevent this script from re-downloading it
2. Run all of the commands in this notebook

In [12]:
# Python > 3, TensorFlow > 1.0
# Modules required for file download and extraction
import os
import sys
import tarfile
from six.moves.urllib.request import urlretrieve

In [13]:
if not os.path.exists('saved_models'):
    os.mkdir('saved_models')
    
if not os.path.exists('data'):
    os.mkdir('data')

In [14]:
def maybe_download(filename, url, directory, force=False):
    """Download a file if not present."""
    if force or not os.path.exists(directory + filename):
        filename, _ = urlretrieve(url + filename, directory + filename)
        print('\nDownload complete for {}'.format(filename))
        return filename
    else:
        print('File {} already present.'.format(filename))
    return directory + filename

def maybe_extract(filename, force=False):
    root = os.path.splitext(os.path.splitext(filename)[0])[0]  # remove .tar.gz
    if os.path.isdir(root) and not force:
        # You may override by setting force=True.
        print('{} already present - don\'t need to extract {}.'.format(root, filename))
    else:
        print('Extracting data for {}. This may take a while. Please wait.'.format(root))
        tar = tarfile.open(filename)
        sys.stdout.flush()
        tar.extractall(root[0:root.rfind('/') + 1])
        tar.close()
    return root

In [21]:
# Directory to download cats/dogs dataset
DATASET_DIR = 'data/dogs_and_cats/'

if not os.path.exists(DATASET_DIR):
    os.mkdir(DATASET_DIR)

# Locations to download data:
url = 'http://samabrahams.com/data/'

cats_dir = DATASET_DIR + 'cats'
dogs_dir = DATASET_DIR + 'dogs'

if not os.path.exists(cats_dir):
    os.mkdir(cats_dir)
if not os.path.exists(dogs_dir):
    os.mkdir(dogs_dir)

# Download cats dogs dataset
train_zip_path = maybe_download('train.tar.gz', url, DATASET_DIR)
train_folder = maybe_extract(train_zip_path, force = True)

File train.tar.gz already present.
Extracting data for data/dogs_and_cats/train. This may take a while. Please wait.


In [22]:
train_folder

'data/dogs_and_cats/train'

In [23]:
def move_to_folder(train_folder, cats_dir, dogs_dir):
    for filepath in os.listdir(train_folder):
        if 'cat' in filepath:
            os.rename(os.path.join(train_folder, filepath),
                      os.path.join(cats_dir, filepath))
        elif 'dog' in filepath:
            os.rename(os.path.join(train_folder, filepath),
                      os.path.join(dogs_dir, filepath))

In [24]:
move_to_folder(train_folder, cats_dir, dogs_dir)

In [25]:
directory = 'saved_models/'
url = 'http://www.cs.toronto.edu/~guerzhoy/tf_alexnet/'
alexnet_weights = 'bvlc_alexnet.npy'
maybe_download(alexnet_weights, url, directory)

File bvlc_alexnet.npy already present.


'saved_models/bvlc_alexnet.npy'

In [20]:
url = 'http://download.tensorflow.org/models/image/imagenet/'
inception = 'inception-2015-12-05.tgz'
inception_path = maybe_download(inception, url, directory)
inception_folder = maybe_extract(inception_path)

File inception-2015-12-05.tgz already present.
Extracting data for saved_models/inception-2015-12-05. This may take a while. Please wait.
