In [1]:
# Import necessary modules
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
import cv2

# Load MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize the images
train_images = train_images / 255.0
test_images = test_images / 255.0

# One-hot encode the labels
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

# Create the model
model = Sequential([
  Flatten(input_shape=(28, 28)),
  Dense(128, activation='relu'),
  Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(train_images, train_labels, epochs=5)

# Evaluate the model
test_loss, test_acc = model.evaluate(test_images, test_labels)
print('Test accuracy:', test_acc)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test accuracy: 0.974399983882904


In [2]:
# Predict on a custom image
custom_image = cv2.imread('we.png', cv2.IMREAD_GRAYSCALE)
custom_image = cv2.resize(custom_image, (28, 28))  # Make sure the image is 28x28 pixels
custom_image = custom_image / 255.0  # Normalize the image

# Add an extra dimension for batch size and predict
custom_image = tf.expand_dims(custom_image, 0)
predictions = model.predict(custom_image)

print("The model thinks the image is digit:", tf.argmax(predictions, axis=1).numpy()[0])

The model thinks the image is digit: 3


In [5]:
import os
import csv
from PIL import Image

def convert_to_yolo_label(c, size, box):
    dw = 1. / size[0]
    dh = 1. / size[1]
    x = (box[0] + box[1]) / 2.0
    y = (box[2] + box[3]) / 2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return f"{c} {x} {y} {w} {h}/n"

# Get list of all images
all_images = os.listdir("./minst/MNIST-ObjectDetection/data/mnist_detection/train/images")
all_images = [img[:-4] for img in all_images if img.endswith(".jpg")]

for image in all_images:
    image_path = os.path.join("./minst/MNIST-ObjectDetection/data/mnist_detection/train/images", f"{image}.jpg")
    label_path = os.path.join("./minst/MNIST-ObjectDetection/data/mnist_detection/train/labels", f"{image}.txt")
    new_label_path = os.path.join("./minst/MNIST-ObjectDetection/data/mnist_detection/train/lb", f"{image}.txt")

    image_obj = Image.open(image_path)
    width, height = image_obj.size

    with open(label_path, "r") as f_in, open(new_label_path, "w") as f_out:
        reader = csv.reader(f_in, delimiter=',')
        next(reader)  # Skip the header
        for row in reader:
            cls, xmin, ymin, xmax, ymax = row
            xmin, ymin, xmax, ymax = float(xmin), float(ymin), float(xmax), float(ymax)
            yolo_label = convert_to_yolo_label(cls, (width, height), (xmin, xmax, ymin, ymax))
            f_out.write(yolo_label)

In [8]:


# Paths to your image and annotation folders
images_folder = "./minst/MNIST-ObjectDetection/data/mnist_detection/test/images"
annotations_folder = "./minst/MNIST-ObjectDetection/data/mnist_detection/test/labels"
output_folder = "./minst/MNIST-ObjectDetection/data/mnist_detection/test/lb"

import os
import cv2

# Function to convert annotation to YOLO format
def convert_to_yolo_format(annotation_file, image_width, image_height, output_file):
    with open(annotation_file, 'r') as infile, open(output_file, 'w') as outfile:
        lines = infile.readlines()[1:]  # Skip the first line with column headers
        for line in lines:
            parts = line.strip().split(',')
            if len(parts) == 5:
                label, xmin, ymin, xmax, ymax = map(int, parts)
                width = xmax - xmin
                height = ymax - ymin
                x_center = xmin + (width / 2)
                y_center = ymin + (height / 2)

                # Normalize the values to be between 0 and 1
                x_center /= image_width
                y_center /= image_height
                width /= image_width
                height /= image_height

                outfile.write(f"{label} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")


# Create the output folder if it doesn't exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Check images in the images folder and their corresponding annotations
for image_name in os.listdir(images_folder):
    if image_name.endswith(".png"):
        image_path = os.path.join(images_folder, image_name)
        annotation_name = os.path.splitext(image_name)[0] + ".txt"
        annotation_path = os.path.join(annotations_folder, annotation_name)

        # Dynamically determine the image dimensions
        image = cv2.imread(image_path)
        image_height, image_width, _ = image.shape

        if os.path.exists(annotation_path):
            output_file = os.path.join(output_folder, os.path.splitext(image_name)[0] + ".txt")
            convert_to_yolo_format(annotation_path, image_width, image_height, output_file)

print("Conversion to YOLO format complete.")


Conversion to YOLO format complete.


In [1]:
import re

# The data string provided
data_string = """
Bounding box: vertices {
  x: 80
  y: 74
}
vertices {
  x: 1775
  y: 74
}
vertices {
  x: 1775
  y: 1197
}
vertices {
  x: 80
  y: 1197
}

Text: Question
Bounding box: vertices {
  x: 80
  y: 92
}
vertices {
  x: 316
  y: 92
}
vertices {
  x: 316
  y: 143
}
vertices {
  x: 80
  y: 143
}

Text: Marks
Bounding box: vertices {
  x: 415
  y: 91
}
vertices {
  x: 577
  y: 91
}
vertices {
  x: 577
  y: 134
}
vertices {
  x: 415
  y: 134
}

Text: Question
Bounding box: vertices {
  x: 674
  y: 89
}
vertices {
  x: 903
  y: 87
}
vertices {
  x: 903
  y: 138
}
vertices {
  x: 674
  y: 140
}

Text: Marks
Bounding box: vertices {
  x: 1000
  y: 86
}
vertices {
  x: 1158
  y: 82
}
vertices {
  x: 1159
  y: 123
}
vertices {
  x: 1001
  y: 127
}

Text: Question
Bounding box: vertices {
  x: 1255
  y: 81
}
vertices {
  x: 1481
  y: 79
}
vertices {
  x: 1482
  y: 131
}
vertices {
  x: 1256
  y: 133
}

Text: Marks
Bounding box: vertices {
  x: 1581
  y: 78
}
vertices {
  x: 1738
  y: 75
}
vertices {
  x: 1739
  y: 119
}
vertices {
  x: 1582
  y: 122
}

Text: No.
Bounding box: vertices {
  x: 157
  y: 171
}
vertices {
  x: 242
  y: 169
}
vertices {
  x: 243
  y: 213
}
vertices {
  x: 158
  y: 215
}

Text: Obtained
Bounding box: vertices {
  x: 380
  y: 164
}
vertices {
  x: 613
  y: 165
}
vertices {
  x: 613
  y: 212
}
vertices {
  x: 380
  y: 211
}

Text: No.
Bounding box: vertices {
  x: 747
  y: 166
}
vertices {
  x: 830
  y: 166
}
vertices {
  x: 830
  y: 207
}
vertices {
  x: 747
  y: 207
}

Text: Obtained
Bounding box: vertices {
  x: 964
  y: 157
}
vertices {
  x: 1195
  y: 157
}
vertices {
  x: 1195
  y: 204
}
vertices {
  x: 964
  y: 204
}

Text: No.
Bounding box: vertices {
  x: 1327
  y: 158
}
vertices {
  x: 1412
  y: 158
}
vertices {
  x: 1412
  y: 201
}
vertices {
  x: 1327
  y: 201
}

Text: Obtained
Bounding box: vertices {
  x: 1544
  y: 154
}
vertices {
  x: 1774
  y: 149
}
vertices {
  x: 1775
  y: 195
}
vertices {
  x: 1545
  y: 200
}

Text: 1
Bounding box: vertices {
  x: 188
  y: 311
}
vertices {
  x: 215
  y: 311
}
vertices {
  x: 215
  y: 351
}
vertices {
  x: 188
  y: 351
}

Text: 2
Bounding box: vertices {
  x: 184
  y: 429
}
vertices {
  x: 215
  y: 429
}
vertices {
  x: 215
  y: 470
}
vertices {
  x: 184
  y: 470
}

Text: 3
Bounding box: vertices {
  x: 182
  y: 549
}
vertices {
  x: 215
  y: 549
}
vertices {
  x: 215
  y: 592
}
vertices {
  x: 182
  y: 592
}

Text: درا
Bounding box: vertices {
  x: 186
  y: 594
}
vertices {
  x: 186
  y: 551
}
vertices {
  x: 210
  y: 551
}
vertices {
  x: 210
  y: 594
}

Text: 4
Bounding box: vertices {
  x: 184
  y: 672
}
vertices {
  x: 217
  y: 672
}
vertices {
  x: 217
  y: 713
}
vertices {
  x: 184
  y: 713
}

Text: 5
Bounding box: vertices {
  x: 181
  y: 790
}
vertices {
  x: 217
  y: 790
}
vertices {
  x: 217
  y: 836
}
vertices {
  x: 181
  y: 836
}

Text: 6
Bounding box: vertices {
  x: 184
  y: 912
}
vertices {
  x: 214
  y: 912
}
vertices {
  x: 214
  y: 953
}
vertices {
  x: 184
  y: 953
}

Text: 7
Bounding box: vertices {
  x: 182
  y: 1033
}
vertices {
  x: 215
  y: 1033
}
vertices {
  x: 214
  y: 1074
}
vertices {
  x: 181
  y: 1074
}

Text: 8
Bounding box: vertices {
  x: 180
  y: 1154
}
vertices {
  x: 217
  y: 1154
}
vertices {
  x: 217
  y: 1197
}
vertices {
  x: 180
  y: 1197
}

Text: 4
Bounding box: vertices {
  x: 419
  y: 285
}
vertices {
  x: 536
  y: 286
}
vertices {
  x: 535
  y: 376
}
vertices {
  x: 418
  y: 375
}

Text: 2
Bounding box: vertices {
  x: 442
  y: 424
}
vertices {
  x: 515
  y: 424
}
vertices {
  x: 515
  y: 480
}
vertices {
  x: 442
  y: 480
}

Text: O
Bounding box: vertices {
  x: 417
  y: 530
}
vertices {
  x: 488
  y: 530
}
vertices {
  x: 488
  y: 588
}
vertices {
  x: 417
  y: 588
}

Text: 2
Bounding box: vertices {
  x: 437
  y: 642
}
vertices {
  x: 524
  y: 641
}
vertices {
  x: 524
  y: 716
}
vertices {
  x: 437
  y: 717
}

Text: 2.5
Bounding box: vertices {
  x: 393
  y: 772
}
vertices {
  x: 548
  y: 750
}
vertices {
  x: 558
  y: 824
}
vertices {
  x: 403
  y: 845
}

Text: o
Bounding box: vertices {
  x: 447
  y: 909
}
vertices {
  x: 520
  y: 907
}
vertices {
  x: 521
  y: 972
}
vertices {
  x: 448
  y: 974
}

Text: 3.
Bounding box: vertices {
  x: 410
  y: 1013
}
vertices {
  x: 501
  y: 1013
}
vertices {
  x: 501
  y: 1085
}
vertices {
  x: 410
  y: 1085
}

Text: S
Bounding box: vertices {
  x: 496
  y: 1013
}
vertices {
  x: 574
  y: 1013
}
vertices {
  x: 574
  y: 1085
}
vertices {
  x: 496
  y: 1085
}

Text: 9
Bounding box: vertices {
  x: 772
  y: 303
}
vertices {
  x: 806
  y: 302
}
vertices {
  x: 807
  y: 344
}
vertices {
  x: 773
  y: 345
}

Text: 10
Bounding box: vertices {
  x: 759
  y: 422
}
vertices {
  x: 821
  y: 422
}
vertices {
  x: 821
  y: 465
}
vertices {
  x: 759
  y: 465
}

Text: 11
Bounding box: vertices {
  x: 758
  y: 542
}
vertices {
  x: 820
  y: 543
}
vertices {
  x: 819
  y: 585
}
vertices {
  x: 757
  y: 584
}

Text: 12
Bounding box: vertices {
  x: 758
  y: 661
}
vertices {
  x: 818
  y: 661
}
vertices {
  x: 818
  y: 705
}
vertices {
  x: 758
  y: 705
}

Text: 13
Bounding box: vertices {
  x: 754
  y: 781
}
vertices {
  x: 819
  y: 781
}
vertices {
  x: 819
  y: 825
}
vertices {
  x: 754
  y: 825
}

Text: 14
Bounding box: vertices {
  x: 756
  y: 902
}
vertices {
  x: 818
  y: 901
}
vertices {
  x: 818
  y: 944
}
vertices {
  x: 756
  y: 945
}

Text: 15
Bounding box: vertices {
  x: 755
  y: 1020
}
vertices {
  x: 815
  y: 1019
}
vertices {
  x: 816
  y: 1064
}
vertices {
  x: 756
  y: 1065
}

Text: 16
Bounding box: vertices {
  x: 754
  y: 1142
}
vertices {
  x: 813
  y: 1141
}
vertices {
  x: 813
  y: 1187
}
vertices {
  x: 754
  y: 1188
}

Text: 17
Bounding box: vertices {
  x: 1338
  y: 292
}
vertices {
  x: 1404
  y: 292
}
vertices {
  x: 1404
  y: 336
}
vertices {
  x: 1338
  y: 336
}

Text: 18
Bounding box: vertices {
  x: 1338
  y: 414
}
vertices {
  x: 1404
  y: 416
}
vertices {
  x: 1403
  y: 456
}
vertices {
  x: 1337
  y: 454
}

Text: 19
Bounding box: vertices {
  x: 1336
  y: 532
}
vertices {
  x: 1401
  y: 531
}
vertices {
  x: 1401
  y: 574
}
vertices {
  x: 1336
  y: 575
}

Text: 20
Bounding box: vertices {
  x: 1337
  y: 652
}
vertices {
  x: 1398
  y: 652
}
vertices {
  x: 1398
  y: 695
}
vertices {
  x: 1337
  y: 695
}

Text: 21
Bounding box: vertices {
  x: 1336
  y: 770
}
vertices {
  x: 1402
  y: 770
}
vertices {
  x: 1402
  y: 816
}
vertices {
  x: 1336
  y: 816
}

Text: 22
Bounding box: vertices {
  x: 1336
  y: 893
}
vertices {
  x: 1402
  y: 893
}
vertices {
  x: 1402
  y: 936
}
vertices {
  x: 1336
  y: 936
}

Text: 23
Bounding box: vertices {
  x: 1335
  y: 1013
}
vertices {
  x: 1400
  y: 1013
}
vertices {
  x: 1400
  y: 1056
}
vertices {
  x: 1335
  y: 1056
}

Text: 24
Bounding box: vertices {
  x: 1335
  y: 1134
}
vertices {
  x: 1402
  y: 1134
}
vertices {
  x: 1402
  y: 1180
}
vertices {
  x: 1335
  y: 1180
}
"""

# Split the data string into separate entries
entries = data_string.split("\n\n")

data = []
for entry in entries:
    # Extract the text
    text_match = re.search(r"Text: (.*)", entry)
    if text_match:
        text = text_match.group(1)
    else:
        continue

    # Extract the bounding box coordinates
    coordinates_match = re.findall(r"x: (\d+).*y: (\d+)", entry)
    if coordinates_match:
        # We only need the top left and bottom right coordinates for the box
        top_left_x, top_left_y = map(int, coordinates_match[0])
        bottom_right_x, bottom_right_y = map(int, coordinates_match[2])
    else:
        continue

    # Add the text and coordinates to the data list
    data.append({
        "Text": text,
        "Top_left_x": top_left_x,
        "Top_left_y": top_left_y,
        "Bottom_right_x": bottom_right_x,
        "Bottom_right_y": bottom_right_y
    })

# Now, `data` is a list of dictionaries in the required format

In [2]:
data

[]