<a href="https://colab.research.google.com/github/kvcops/AI-Text-Classification/blob/main/test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Google Colab setup
!pip install opencv-python-headless
!pip install tensorflow
!pip install kaggle
!pip install cairosvg
!pip install pandas



In [None]:
import pandas as pd
import numpy as np
import cv2
import json
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, UpSampling2D, Input
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
import cairosvg
import zipfile
import tensorflow as tf

In [None]:
!mkdir -p ~/.kaggle
!cp /content/kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [None]:
# Ensure TensorFlow uses the GPU
if not tf.test.is_gpu_available():
    raise SystemError('GPU device not found')


Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


In [None]:
# Download and unzip the dataset
!kaggle datasets download -d ashishjangra27/doodle-dataset
!unzip doodle-dataset.zip -d /content/doodle_dataset


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/doodle_dataset/doodle/zebra/5237826645917696.png  
  inflating: /content/doodle_dataset/doodle/zebra/5238496203636736.png  
  inflating: /content/doodle_dataset/doodle/zebra/5239548336406528.png  
  inflating: /content/doodle_dataset/doodle/zebra/5239876129652736.png  
  inflating: /content/doodle_dataset/doodle/zebra/5241190779715584.png  
  inflating: /content/doodle_dataset/doodle/zebra/5242472437383168.png  
  inflating: /content/doodle_dataset/doodle/zebra/5242551911055360.png  
  inflating: /content/doodle_dataset/doodle/zebra/5242728591917056.png  
  inflating: /content/doodle_dataset/doodle/zebra/5244520788656128.png  
  inflating: /content/doodle_dataset/doodle/zebra/5247778051588096.png  
  inflating: /content/doodle_dataset/doodle/zebra/5248250963558400.png  
  inflating: /content/doodle_dataset/doodle/zebra/5248631722475520.png  
  inflating: /content/doodle_dataset/doodle/zebra/524933604

In [None]:
# Data Preprocessing
def json_to_image(drawing_json):
    drawing = json.loads(drawing_json)
    image = np.zeros((256, 256), dtype=np.uint8)
    for stroke in drawing:
        for i in range(len(stroke[0]) - 1):
            cv2.line(image, (stroke[0][i], stroke[1][i]),
                     (stroke[0][i + 1], stroke[1][i + 1]), 255, 2)
    return image

In [None]:
# Load dataset
df = pd.read_csv('/content/doodle_dataset/master_doodle_dataframe.csv')

X = []
y = []


In [None]:
# Verify the directory structure
print("Directory structure of /content/doodle_dataset:")
!ls /content/doodle_dataset


Directory structure of /content/doodle_dataset:
doodle	master_doodle_dataframe.csv


In [None]:
# List all files and directories in the dataset
print("Listing all files and directories in /content/doodle_dataset:")
!find /content/doodle_dataset -type d  # List all directories

# List a specific number of files to see how they are named
print("Listing some files in /content/doodle_dataset:")
!find /content/doodle_dataset -type f | head -n 20


Listing all files and directories in /content/doodle_dataset:
/content/doodle_dataset
/content/doodle_dataset/doodle
/content/doodle_dataset/doodle/shorts
/content/doodle_dataset/doodle/string bean
/content/doodle_dataset/doodle/sink
/content/doodle_dataset/doodle/ocean
/content/doodle_dataset/doodle/car
/content/doodle_dataset/doodle/pillow
/content/doodle_dataset/doodle/police car
/content/doodle_dataset/doodle/hat
/content/doodle_dataset/doodle/star
/content/doodle_dataset/doodle/cookie
/content/doodle_dataset/doodle/lantern
/content/doodle_dataset/doodle/cell phone
/content/doodle_dataset/doodle/feather
/content/doodle_dataset/doodle/anvil
/content/doodle_dataset/doodle/church
/content/doodle_dataset/doodle/yoga
/content/doodle_dataset/doodle/castle
/content/doodle_dataset/doodle/mosquito
/content/doodle_dataset/doodle/mountain
/content/doodle_dataset/doodle/tennis racquet
/content/doodle_dataset/doodle/helicopter
/content/doodle_dataset/doodle/The Great Wall of China
/content/dood

In [None]:
import os
for index, row in df.iterrows():
    class_label = row['word']
    image_name = str(row['key_id']) + '.png'
    image_path = f'/content/doodle_dataset/doodle/{class_label}/{image_name}'

    if not os.path.exists(image_path):
        print(f"ERROR: File not found {image_path}")
        continue

    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        print(f"ERROR loading {image_name}")
        continue

    image = cv2.resize(image, (256, 256))
    y.append(image)
    X.append(json_to_image(row['drawing']))

# Convert to NumPy arrays and normalize
X = np.array(X).reshape(-1, 256, 256, 1) / 255.0
y = np.array(y).reshape(-1, 256, 256, 1) / 255.0

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# --- 5. Create U-Net Model ---
inputs = Input(shape=(256, 256, 1))

# Encoder
conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
pool1 = MaxPooling2D((2, 2))(conv1)
conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
pool2 = MaxPooling2D((2, 2))(conv2)

# Decoder
up1 = UpSampling2D((2, 2))(pool2)
conv3 = Conv2D(64, (3, 3), activation='relu', padding='same')(up1)
up2 = UpSampling2D((2, 2))(conv3)
conv4 = Conv2D(32, (3, 3), activation='relu', padding='same')(up2)

# Output layer
outputs = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(conv4)

model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam', loss='binary_crossentropy')


In [None]:
# --- 6. Train the Model ---
with tf.device('/device:GPU:0'):  # Use GPU if available
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32)


In [None]:
# --- 7. Function to Process SVG and Save Output as JPG ---
def process_svg_and_save_jpg(svg_file_path, output_jpg_path):
    cairosvg.svg2png(url=svg_file_path, write_to="temp.png")
    image = cv2.imread("temp.png", cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (256, 256))
    input_image = image.reshape(1, 256, 256, 1) / 255.0

    with tf.device('/device:GPU:0'): # Use GPU if available
        regularized = model.predict(input_image)

    regularized_image = (regularized[0, :, :, 0] * 255).astype(np.uint8)
    cv2.imwrite(output_jpg_path, regularized_image)

    plt.imshow(regularized_image, cmap='gray')
    plt.title('Regularized Image')
    plt.show()

In [None]:
# --- 8. Example Usage (after the model is trained) ---
svg_file_path = '/path/to/your/input.svg'  # Update with your SVG file path
output_jpg_path = '/path/to/your/output.jpg' # Update with desired output JPG location
process_svg_and_save_jpg(svg_file_path, output_jpg_path)