In [None]:
!pip install tensorflow opencv-python-headless matplotlib




In [None]:
from google.colab import files
files.upload()  # Upload kaggle.json

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download the dataset
!kaggle datasets download -d google/tinyquickdraw
!unzip tinyquickdraw.zip -d quickdraw_data


Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/google/tinyquickdraw
License(s): other
Downloading tinyquickdraw.zip to /content
100% 11.1G/11.1G [11:44<00:00, 20.2MB/s]
100% 11.1G/11.1G [11:44<00:00, 16.9MB/s]
Archive:  tinyquickdraw.zip
  inflating: quickdraw_data/baseball bat.ndjson  
  inflating: quickdraw_data/finger.ndjson  
  inflating: quickdraw_data/helmet.ndjson  
  inflating: quickdraw_data/hexagon.ndjson  
  inflating: quickdraw_data/hockey stick.ndjson  
  inflating: quickdraw_data/quickdraw_simplified/The Eiffel Tower.ndjson  
  inflating: quickdraw_data/quickdraw_simplified/The Great Wall of China.ndjson  
  inflating: quickdraw_data/quickdraw_simplified/The Mona Lisa.ndjson  
  inflating: quickdraw_data/quickdraw_simplified/aircraft carrier.ndjson  
  inflating: quickdraw_data/quickdraw_simplified/airplane.ndjson  
  inflating: quickdraw_data/quickdraw_simplified/alarm clock.ndjson  
  inflating: quickdraw_data/quickdraw_simplified/ambulan

In [None]:
!pip install ndjson

Collecting ndjson
  Downloading ndjson-0.3.1-py2.py3-none-any.whl.metadata (3.2 kB)
Downloading ndjson-0.3.1-py2.py3-none-any.whl (5.3 kB)
Installing collected packages: ndjson
Successfully installed ndjson-0.3.1


In [None]:
import os
import numpy as np
import cv2
import ndjson

def preprocess_quickdraw_ndjson(data_path, output_path, occlusion_probability=0.3):
    incomplete_dir = os.path.join(output_path, "incomplete")
    complete_dir = os.path.join(output_path, "complete")

    os.makedirs(incomplete_dir, exist_ok=True)
    os.makedirs(complete_dir, exist_ok=True)

    for filename in os.listdir(data_path):
        if filename.endswith('.ndjson'):
            with open(os.path.join(data_path, filename)) as f:
                sketches = ndjson.load(f)
            for i, sketch_data in enumerate(sketches):
                sketch = sketch_data['drawing']
                img = np.zeros((256, 256), dtype=np.uint8)

                # Convert sketch to a numpy array and draw it
                for stroke in sketch:
                    points = np.array(list(zip(stroke[0], stroke[1])), dtype=np.int32)
                    cv2.polylines(img, [points], False, 255, 2)

                # Apply random occlusion
                if np.random.rand() < occlusion_probability:
                    occlusion_mask = np.random.randint(0, 2, img.shape).astype(np.uint8)
                    incomplete_img = img * occlusion_mask
                else:
                    incomplete_img = img

                # Save complete and incomplete images
                complete_path = os.path.join(complete_dir, f"{filename.split('.')[0]}_{i}.png")
                incomplete_path = os.path.join(incomplete_dir, f"{filename.split('.')[0]}_{i}.png")
                cv2.imwrite(complete_path, img)
                cv2.imwrite(incomplete_path, incomplete_img)

# Example usage
data_path = "quickdraw_data"  # Path to your .ndjson files
output_path = "preprocessed_data"
preprocess_quickdraw_ndjson(data_path, output_path)


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, Model

def unet_model(input_shape=(256, 256, 1)):
    inputs = layers.Input(shape=input_shape)

    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(p2)
    c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c3)
    p3 = layers.MaxPooling2D((2, 2))(c3)

    c4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(p3)
    c4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(c4)

    u5 = layers.Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(c4)
    u5 = layers.concatenate([u5, c3])
    c5 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(u5)
    c5 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c5)

    u6 = layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c5)
    u6 = layers.concatenate([u6, c2])
    c6 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(u6)
    c6 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c6)

    u7 = layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c6)
    u7 = layers.concatenate([u7, c1])
    c7 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u7)
    c7 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c7)

    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(c7)

    model = Model(inputs=[inputs], outputs=[outputs])

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model

# Instantiate the model
model = unet_model()

# Summary of the model
model.summary()


In [None]:
import os

complete_dir = "preprocessed_data/complete"
incomplete_dir = "preprocessed_data/incomplete"

print(f"Number of images in 'complete': {len(os.listdir(complete_dir))}")
print(f"Number of images in 'incomplete': {len(os.listdir(incomplete_dir))}")


Number of images in 'complete': 686210
Number of images in 'incomplete': 686210


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint

# Data augmentation configuration
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2  # 20% of the data used for validation
)

def train_model(model, data_dir, batch_size=32, epochs=50):
    # Training data generator
    train_gen = datagen.flow_from_directory(
        data_dir,
        target_size=(256, 256),
        batch_size=batch_size,
        class_mode='input',  # The input and output are the same type
        color_mode='grayscale',
        subset='training',
        classes=['incomplete']  # Only look in the 'incomplete' folder
    )

    # Validation data generator
    val_gen = datagen.flow_from_directory(
        data_dir,
        target_size=(256, 256),
        batch_size=batch_size,
        class_mode='input',  # The input and output are the same type
        color_mode='grayscale',
        subset='validation',
        classes=['incomplete']  # Only look in the 'incomplete' folder
    )

    # Checkpoint to save the best model
    checkpoint = ModelCheckpoint("unet_model.keras", monitor='val_loss', verbose=1, save_best_only=True)

    # Training the model
    model.fit(
        train_gen,
        steps_per_epoch=len(train_gen),
        validation_data=val_gen,
        validation_steps=len(val_gen),
        epochs=epochs,
        callbacks=[checkpoint]
    )

# Assuming 'model' is your U-Net model already defined
data_dir = "preprocessed_data"  # Directory containing 'complete' and 'incomplete' folders
train_model(model, data_dir)


Found 548968 images belonging to 1 classes.
Found 137242 images belonging to 1 classes.
Epoch 1/50


  self._warn_if_super_not_called()


[1m  175/17156[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m5:17:26[0m 1s/step - accuracy: 0.5484 - loss: nan

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model

def complete_curve_with_model(image_path, model_path="unet_model.h5"):
    model = load_model(model_path)
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (256, 256))
    image = np.expand_dims(image, axis=-1)
    image = np.expand_dims(image, axis=0)

    completed_image = model.predict(image)
    completed_image = completed_image.squeeze()

    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.title("Incomplete Image")
    plt.imshow(image.squeeze(), cmap='gray')

    plt.subplot(1, 2, 2)
    plt.title("Completed Image")
    plt.imshow(completed_image, cmap='gray')

    plt.show()

# Example usage
image_path = "path_to_your_incomplete_image.png"  # Replace with your image file path
complete_curve_with_model(image_path)
