In [None]:
%%capture
# Download Dependencies
%pip install -q jmd_imagescraper
%pip install tensorflow_datasets==4.7
%pip install os

If the imports below does not work, you may need to restart the kernel. This can be done in top right "RESTART KERNEL" button.

In [None]:
%%capture
# Import Dependencies
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import Sequential, losses, metrics, layers
import numpy as np
import os
from PIL import Image
from jmd_imagescraper.core import * # duckduckgo_search comes from here

In [None]:
from pathlib import Path
root = Path().cwd()/"images"

cat_images = duckduckgo_search(root, "Cats", "cute kittens", max_results=100)
dog_images = duckduckgo_search(root, "Dogs", "cute puppies", max_results=100)

In [None]:
CLASSES = ['Cats', 'Dogs']

def get_label(file_path):
    label = os.path.normpath(file_path).split(os.path.sep)[-2] # extract the class from the label
    return CLASSES.index(label) # return the label 

def process_file(file_path, img_size=224):
    img = Image.open(file_path) # load the image from the url
    img = img.resize((img_size, img_size), Image.Resampling.BILINEAR) # resize the image to 224 x 224 pixels
    img = np.asarray(img) # convert Image to np array
    img = img/255.0 # scale image between 0 and 1 to improve model performance
    return img # return the image 

In [None]:
def create_dataset(datasets, training_proportion=0.8): # takes in list of image_urls ex. [cat_images, dog_images]
    
    training_features, training_labels = [], []
    testing_features, testing_labels = [], []
    
    for image_urls in datasets: # extract image_urls of a single class ex. cat_images
        for index, url in enumerate(image_urls): # loop through every url in cat_images

            if index < (len(image_urls) * training_proportion): # send a certain proportion of images for training, and the rest for testing

                training_features.append(process_file(url))
                training_labels.append(get_label(url))

            else: 
                testing_features.append(process_file(url))
                testing_labels.append(get_label(url))
                
    return training_features, training_labels, testing_features, testing_labels

In [None]:
training_features, training_labels, testing_features, testing_labels = create_dataset([cat_images, dog_images])

In [None]:
training_labels_np = np.asarray(training_labels).astype('float32')
training_features_np = np.asarray(training_features).astype('float32')

testing_labels_np = np.asarray(testing_labels).astype('float32')
testing_features_np = np.asarray(testing_features).astype('float32')

train_dataset = tf.data.Dataset.from_tensor_slices((training_features_np, training_labels_np))
test_dataset = tf.data.Dataset.from_tensor_slices((testing_features_np, testing_labels_np))


model = Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(2))

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

history = model.fit(train_dataset.batch(16), epochs=10, 
                    validation_data=test_dataset.batch(16))