In [1]:
import requests

def extract_cifar_data(url, filename="cifar.tar.gz"):
    """A function for extracting the CIFAR-100 dataset and storing it as a gzipped file.

    Arguments:
    url      -- the URL where the dataset is hosted
    filename -- the full path where the dataset will be written
    """
    # Make an HTTP GET request to fetch the dataset
    r = requests.get(url, stream=True)
    
    # Open the file in binary write mode and save the content
    with open(filename, "wb") as file:
        for chunk in r.iter_content(chunk_size=1024):  # Download in chunks to avoid memory overload
            if chunk:
                file.write(chunk)
    print(f"Dataset downloaded and saved as {filename}")

# Run the function to download the CIFAR-100 dataset
extract_cifar_data("https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz")



KeyboardInterrupt



In [None]:
import requests

def extract_cifar_data(url, filename="cifar.tar.gz"):
    """A function for extracting the CIFAR-100 dataset and storing it as a gzipped file
    
    Arguments:
    url      -- the URL where the dataset is hosted
    filename -- the full path where the dataset will be written
    
    """
    
    # Todo: request the data from the data url
    # Hint: use `requests.get` method
    r = requests.get(url)
    with open(filename, "wb") as file_context:
        file_context.write(r.content)
    return

In [None]:
extract_cifar_data("https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz")     


In [None]:
import tarfile

with tarfile.open("cifar.tar.gz", "r:gz") as tar:
    tar.extractall()

In [None]:
import pickle

with open("./cifar-100-python/meta", "rb") as f:
    dataset_meta = pickle.load(f, encoding='bytes')

with open("./cifar-100-python/test", "rb") as f:
    dataset_test = pickle.load(f, encoding='bytes')

with open("./cifar-100-python/train", "rb") as f:
    dataset_train = pickle.load(f, encoding='bytes')

In [None]:
dataset_train.keys()


In [None]:
32*32*3


In [None]:
import numpy as np

# Each 1024 in a row is a channel (red, green, then blue)
row = dataset_train[b'data'][0]
red, green, blue = row[0:1024], row[1024:2048], row[2048:]

# Each 32 items in the channel are a row in the 32x32 image
red = red.reshape(32,32)
green = green.reshape(32,32)
blue = blue.reshape(32,32)

# Combine the channels into a 32x32x3 image!
combined = np.dstack((red,green,blue))

In [None]:
# All in one:
test_image = np.dstack((
    row[0:1024].reshape(32,32),
    row[1024:2048].reshape(32,32),
    row[2048:].reshape(32,32)
))

In [None]:
%matplotlib inline


In [None]:
import matplotlib.pyplot as plt
plt.imshow(test_image);

In [None]:
dataset_train[b'fine_labels'][0]
print(dataset_meta[b'fine_label_names'][19])
n = 0
print(dataset_meta[b'fine_label_names'][dataset_train[b'fine_labels'][n]])
print(dataset_train[b'filenames'][0])
plt.imsave("file.png", test_image)
import pandas as pd

# Todo: Filter the dataset_train and dataset_meta objects to find the label numbers for Bicycle and Motorcycles

bicycle_index = dataset_meta[b'fine_label_names'].index(b'bicycle')
motocycle_index = dataset_meta[b'fine_label_names'].index(b'motorcycle')

print (bicycle_index)
print(motocycle_index)

In [None]:
#Construct the dataframe
df_train = pd.DataFrame({
    "filenames": dataset_train[b'filenames'],
    "labels": dataset_train[b'fine_labels'],
    "row": range(len(dataset_train[b'filenames']))
})

# Drop all rows from df_train where label is not 8 or 48
df_train = df_train.loc[df_train["labels"].isin([8,48])]
                                        #TODO: Fill in

# Decode df_train.filenames so they are regular strings
df_train["filenames"] = df_train["filenames"].apply(
    lambda x: x.decode("utf-8")
)


df_test = pd.DataFrame({
    "filenames": dataset_test[b'filenames'],
    "labels": dataset_test[b'fine_labels'],
    "row": range(len(dataset_test[b'filenames']))
})

# Drop all rows from df_test where label is not 8 or 48
df_test = df_test.loc[df_test["labels"].isin([8,48])]
# Decode df_test.filenames so they are regular strings
df_test["filenames"] = df_test["filenames"].apply(
    lambda x: x.decode("utf-8")
)

In [None]:
#tests 
print(df_train.shape)
print(df_test.shape)

In [None]:
!mkdir ./train
!mkdir ./test

In [None]:
def save_images(dataset, row, path):
    #Grab the image data in row-major form
    img = dataset[b'data'][row.row]
    
    # Consolidated stacking/reshaping from earlier
    target = np.dstack(img.reshape(3,32,32))
    
    # Save the image
    plt.imsave(path+row['filenames'], target)
    
    # Return any signal data you want for debugging
    return 0

## TODO: save ALL images using the save_images function
for idx, row in df_train.iterrows():
    save_images(dataset_train, row, './train/')
    
for idx, row in df_test.iterrows():
    save_images(dataset_test, row, './test/')

In [None]:
python3 -m pip install tensorflow

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the CNN model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(2, activation='softmax')  # 2 classes: bicycle and motorcycle
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Create data generators for train and test sets
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    './train',
    target_size=(32, 32),
    batch_size=32,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    './test',
    target_size=(32, 32),
    batch_size=32,
    class_mode='binary'
)

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=100,  # Adjust based on the dataset size
    epochs=28,
    validation_data=test_generator,
    validation_steps=50
)

# Save the trained model
model.save('./image_classification_model.h5')

print("Model trained and saved locally.")


In [None]:
import pandas as pd

# Define the function to create metadata .lst files
def to_metadata_file(df, prefix):
    # Set the s3_path (or local file path) to the filenames
    df["s3_path"] = df["filenames"]
    
    # Assign labels: 0 for bicycles (label 8), 1 for motorcycles (label 48)
    df["labels"] = df["labels"].apply(lambda x: 0 if x == 8 else 1)
    
    # Save the dataframe to a .lst file
    df[["row", "labels", "s3_path"]].to_csv(
        f"{prefix}.lst", sep="\t", index=False, header=False
    )
    print(f"{prefix}.lst file created successfully.")

# Apply the function to the train and test datasets
to_metadata_file(df_train.copy(), "train")
to_metadata_file(df_test.copy(), "test")
