In [1]:
import numpy as np

# Load your data
train_images = np.load('/kaggle/input/sudokotask12/imgs-001.npy')  # Shape: (50000, 252, 252)
train_labels = np.load('/kaggle/input/sudokotask12/known_values_labels.npy')  # Shape: (50000, 41, 3)
test_images = np.load('/kaggle/input/sudokotask12/imgs.npy')    # Shape: (10000, 252, 252)

In [2]:
import cv2

import numpy as np

# def preprocess_images_batch(images, batch_size):
#     num_images = images.shape[0]
#     cell_size = 28
#     for batch_start in range(0, num_images, batch_size):
#         batch_end = min(batch_start + batch_size, num_images)
#         processed_batch = np.zeros(((batch_end - batch_start) * 81, cell_size, cell_size))

#         for idx in range(batch_start, batch_end):
#             for row in range(9):
#                 for col in range(9):
#                     cell = images[idx, row * cell_size:(row + 1) * cell_size, col * cell_size:(col + 1) * cell_size]
#                     processed_cell = cell / 255.0
#                     processed_batch[(idx - batch_start) * 81 + row * 9 + col] = processed_cell

#         # Here you can save each batch to disk or yield it if using a generator
#         yield processed_batch

# # Example usage
# batch_size = 100  # Adjust this based on your system's memory capacity
# for processed_batch in preprocess_images_batch(train_images, batch_size):
#     # Save or process each batch
#     pass

# for processed_batch in preprocess_images_batch(train_images, batch_size):
#     # Save or process each batch
#     pass
# def preprocess_images_simple(images):
#     num_images = images.shape[0]
#     cell_size = 28
#     processed_images = np.zeros((num_images * 81, cell_size, cell_size))

#     for idx in range(num_images):
#         for row in range(9):
#             for col in range(9):
#                 # Extracting each cell
#                 cell = images[idx, row * cell_size:(row + 1) * cell_size, col * cell_size:(col + 1) * cell_size]
#                 # Normalize the cell
#                 processed_cell = cell / 255.0
#                 # Store in processed_images
#                 processed_images[idx * 81 + row * 9 + col] = processed_cell

#     return processed_images
def preprocess_images_simple(images, new_size=(14, 14)):
    num_images = images.shape[0]
    processed_images = np.zeros((num_images * 81, new_size[0], new_size[1]))

    for idx in range(num_images):
        for row in range(9):
            for col in range(9):
                # Extract each cell
                cell = images[idx, row*28:(row+1)*28, col*28:(col+1)*28]
                # Resize the cell
                resized_cell = cv2.resize(cell, new_size, interpolation=cv2.INTER_AREA)
                # Normalize the cell
                processed_cell = resized_cell / 255.0
                # Store in processed_images
                processed_images[idx * 81 + row * 9 + col] = processed_cell

    return processed_images

# Process the entire datasets with reduced resolution
train_images_processed = preprocess_images_simple(train_images)
test_images_processed = preprocess_images_simple(test_images)

# # Process the entire datasets
# train_images_processed = preprocess_images_simple(train_images)
# test_images_processed = preprocess_images_simple(test_images)


# train_images_processed = preprocess_images_batch(train_images, batch_size)
# test_images_processed = preprocess_images_batch(test_images, batch_size)
# def preprocess_images(images):
#     num_images = images.shape[0]
#     # Each image is divided into 9x9 cells, each cell is approximately 28x28
#     cell_size = 28
#     processed_images = np.zeros((num_images * 81, cell_size, cell_size))

#     for idx in range(num_images):
#         for row in range(9):
#             for col in range(9):
#                 # Extracting each cell
#                 cell = images[idx, row * cell_size:(row + 1) * cell_size, col * cell_size:(col + 1) * cell_size]
#                 # Normalize the cell
#                 processed_cell = cell / 255.0
#                 # Store in processed_images
#                 processed_images[idx * 81 + row * 9 + col] = processed_cell

#     return processed_images

# train_images_processed = preprocess_images(train_images)
# test_images_processed = preprocess_images(test_images)

In [3]:
# We need to convert train_labels to a format suitable for training
def prepare_labels(labels):
    # Flatten the labels into a 1D array
    # Each image contributes 81 (9x9) cells
    num_images = len(labels)
    new_labels = np.zeros((num_images * 81), dtype=int)

    for i, label in enumerate(labels):
        for cell in label:
            x, y, value = cell
            x, y, value = int(x), int(y), int(value)  # Ensure integers
            if 0 <= x < 9 and 0 <= y < 9:  # Check for bounds
                # Convert 2D coordinates to 1D index and set the value
                new_labels[i * 81 + x * 9 + y] = value

    return new_labels

train_labels_processed = prepare_labels(train_labels)



In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# def build_model():
#     model = Sequential([
#         Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1), padding='same'),
#         MaxPooling2D(2, 2),

#         Conv2D(64, (3, 3), activation='relu', padding='same'),
#         MaxPooling2D(2, 2),

#         Conv2D(128, (3, 3), activation='relu', padding='same'),
#         MaxPooling2D(2, 2),

#         Flatten(),
#         Dense(128, activation='relu'),
#         Dropout(0.5),
#         Dense(10, activation='softmax')
#     ])
#     return model
def build_model():
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(14, 14, 1), padding='same'),
        MaxPooling2D(2, 2),

        Conv2D(64, (3, 3), activation='relu', padding='same'),
        MaxPooling2D(2, 2),

        Flatten(),
        Dense(64, activation='relu'),  # Reduced number of neurons
        Dropout(0.5),
        Dense(10, activation='softmax')  # 10 classes for digits 0-9
    ])
    return model

# model = build_model()
# model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# model.summary()

model = build_model()
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()




Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 14, 14, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2  (None, 7, 7, 32)          0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 7, 7, 64)          18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 3, 3, 64)          0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 576)               0         
                                                                 
 dense (Dense)               (None, 64)                3

In [5]:
from sklearn.model_selection import train_test_split

# Assuming train_images_processed and train_labels_processed are your full datasets
# Split the first 25,000 for training and the rest for potential validation and testing
train_images_subset = train_images_processed[:50000]
train_labels_subset = train_labels_processed[:50000]

# Further split the training data to create a validation set
# Here, I'll use 20% of the 25,000 images for validation, but you can adjust this percentage
train_images_final, val_images, train_labels_final, val_labels = train_test_split(
    train_images_subset, train_labels_subset, test_size=0.2, random_state=42)

# Reshape the datasets for the CNN
train_images_final_reshaped = np.expand_dims(train_images_final, axis=-1)
val_images_reshaped = np.expand_dims(val_images, axis=-1)


In [6]:
# Train the model
model.fit(train_images_final_reshaped, train_labels_final, 
          validation_data=(val_images_reshaped, val_labels),
          epochs=10, batch_size=8)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7a80c0f389a0>

In [7]:
model.save('/kaggle/working/model')


In [8]:
import tensorflow as tf
import shutil

# Assume 'your_model_path' is the directory where your model is saved
model_path = '/kaggle/working/model'

# Save the model
model = tf.keras.models.load_model(model_path)

# Specify the path for the zip file
zip_file_path = 'task1model.zip'

# Use shutil to compress the model directory into a zip file
shutil.make_archive(zip_file_path.replace('.zip', ''), 'zip', model_path)


'/kaggle/working/task1model.zip'

In [1]:
import tensorflow as tf
# Load the model from the file
loaded_model = tf.keras.models.load_model('/kaggle/input/task1zipfile')



In [2]:
import cv2

import numpy as np
def preprocess_images_simple(images, new_size=(14, 14)):
    num_images = images.shape[0]
    processed_images = np.zeros((num_images * 81, new_size[0], new_size[1]))

    for idx in range(num_images):
        for row in range(9):
            for col in range(9):
                # Extract each cell
                cell = images[idx, row*28:(row+1)*28, col*28:(col+1)*28]
                # Resize the cell
                resized_cell = cv2.resize(cell, new_size, interpolation=cv2.INTER_AREA)
                # Normalize the cell
                processed_cell = resized_cell / 255.0
                # Store in processed_images
                processed_images[idx * 81 + row * 9 + col] = processed_cell

    return processed_images

In [3]:
test_images = np.load('/kaggle/input/sudokotask2-dataset/imgs-002.npy')    # Shape: (10000, 252, 252)
test_images_processed = preprocess_images_simple(test_images)
test_images_reshaped = np.expand_dims(test_images_processed, axis=-1)

In [4]:
# Predict
predictions = loaded_model.predict(test_images_reshaped,batch_size=10)



In [15]:
def create_submission_file(predictions, num_images):
    with open('submission6.csv', 'w') as f:
        f.write('id,value\n')
        for img_id in range(num_images):
            for position in range(81):  # Iterate over each cell in the grid
                x = position // 9  # x-coordinate (row)
                y = position % 9   # y-coordinate (column)
                cell_id = f"{img_id}_{x}{y}"
                # Calculate the index in the predictions array
                pred_index = img_id * 81 + position
                # Get the most likely digit for this cell
                value = np.argmax(predictions[pred_index])
                f.write(f"{cell_id},{value}\n")

create_submission_file(predictions, num_images=10000)  # Replace 50000 with your actual number of images

In [5]:
def create_submission_file(predictions, output_file='submission.csv'):
    with open(output_file, 'w') as f:
        f.write('id,value\n')
        
        for img_id, prediction in enumerate(predictions):
            row_values = [str(int(round(value))) for value in prediction]
            row_id = f"{img_id}"
            f.write(f"{row_id}," + ''.join(row_values) + '\n')
    
create_submission_file(predictions)

In [8]:
import csv

# Replace 'input_file.csv' and 'output_file.csv' with your actual file names
input_file = '/kaggle/input/outputfile/Train Image submission.csv'
output_file = '/kaggle/working/Submission1.csv'

# Read the CSV file
with open(input_file, 'r') as infile:
    reader = csv.reader(infile)
    header = next(reader)  # Read and skip the header

    # Open the output CSV file for writing
    with open(output_file, 'w', newline='') as outfile:
        writer = csv.writer(outfile)

        # Write the modified header to the output file
        writer.writerow(['QUIZZ'])

        # Process each row in the input file
        for row in reader:
            # Ensure the row has enough columns
            if len(row) >= 2:
                # Concatenate the 'value' column
                quizz_values = row[1]  # Assuming 'value' is at index 1
                # Write the concatenated value to the output file
                writer.writerow([quizz_values])
            else:
                print(f"Skipping row {reader.line_num} due to insufficient columns.")

In [10]:
import csv

# Replace 'input_file.csv' and 'output_file.csv' with your actual file names
input_file = '/kaggle/input/outputfile/Train Image submission.csv'
output_file = '/kaggle/working/Submission2.csv'

# Read the CSV file
with open(input_file, 'r') as infile:
    reader = csv.reader(infile)
    header = next(reader)  # Read and skip the header

    # Open the output CSV file for writing
    with open(output_file, 'w', newline='') as outfile:
        writer = csv.writer(outfile)

        # Write the modified header to the output file
        writer.writerow(['QUIZZ'])

        # Combine all 'value' values into a single line
        quizz_values = ''.join(row[1] for row in reader)

        # Write the concatenated values to the output file
        writer.writerow([quizz_values])


In [12]:
import csv

# Replace 'input_file.csv' and 'output_file.csv' with your actual file names
input_file = '/kaggle/input/outputfile/Train Image submission.csv'
output_file = '/kaggle/working/Submission3.csv'

# Read the CSV file
with open(input_file, 'r') as infile:
    reader = csv.reader(infile)
    header = next(reader)  # Read and skip the header

    # Open the output CSV file for writing
    with open(output_file, 'w', newline='') as outfile:
        writer = csv.writer(outfile)

        # Write the modified header to the output file
        writer.writerow(['QUIZZ'])

        # Process each row in the input file
        for row in reader:
            # Ensure the row has enough columns
            if len(row) >= 2:
                # Combine the first 81 'value' values into a single line
                quizz_values = ''.join(row[1:82])  # Assuming 'value' starts at index 1
                # Write the concatenated values to the output file
                writer.writerow([quizz_values])
            else:
                print(f"Skipping row {reader.line_num} due to insufficient columns.")


NameError: name 'column' is not defined

In [14]:
import csv

# Replace 'input_file.csv' and 'output_file.csv' with your actual file names
input_file = '/kaggle/input/outputfile/Train Image submission.csv'
output_file = '/kaggle/working/Submission4.csv'

# Read the CSV file
with open(input_file, 'r') as infile:
    reader = csv.reader(infile)
    header = next(reader)  # Read and skip the header

    # Open the output CSV file for writing
    with open(output_file, 'w', newline='') as outfile:
        writer = csv.writer(outfile)

        # Write the modified header to the output file
        writer.writerow(['QUIZZ'])

        # Process each row in the input file
        for row in reader:
            # Ensure the row has enough columns
            if len(row) >= 2:
                # Combine all 'value' values into a single line
                quizz_values = ''.join(row[1:])  # Assuming 'value' starts at index 1
                # Write the concatenated values to the output file
                writer.writerow([quizz_values])
            else:
                print(f"Skipping row {reader.line_num} due to insufficient columns.")


In [15]:
import csv

# Replace 'input_file.csv' and 'output_file.csv' with your actual file names
input_file = '/kaggle/input/outputfile/Train Image submission.csv'
output_file = '/kaggle/working/Submission4.csv'

# Read the CSV file
with open(input_file, 'r') as infile:
    reader = csv.reader(infile)
    header = next(reader)  # Read and skip the header

    # Open the output CSV file for writing
    with open(output_file, 'w', newline='') as outfile:
        writer = csv.writer(outfile)

        # Write the modified header to the output file
        writer.writerow(['QUIZZ'] + [f'Digit_{i+1}' for i in range(81)])

        # Process each row in the input file
        for row in reader:
            # Ensure the row has enough columns
            if len(row) >= 82:  # Assuming 'value' starts at index 1
                # Extract the 'value' values from the row
                quizz_values = row[1:82]
                # Write the extracted values to the output file
                writer.writerow([''] + quizz_values)  # Adding an empty column for 'QUIZZ'
            else:
                print(f"Skipping row {reader.line_num} due to insufficient columns.")


In [25]:
import pandas as pd

# Load the CSV file
file_path = '/kaggle/input/outputfile/Train Label submission.csv'  # Replace with your CSV file path
df = pd.read_csv(file_path)

# Initialize an empty list to store the concatenated strings
concatenated_values = []

# Iterate over the DataFrame in chunks of 81 rows
for start in range(0, len(df), 81):
    end = start + 81
    chunk = df.iloc[start:end]
    concatenated_string = ''.join(map(str, chunk['value'].values))
    concatenated_values.append(concatenated_string)

# Create a new DataFrame with the concatenated strings
new_df = pd.DataFrame(concatenated_values, columns=['Concatenated Values'])

# Save the new DataFrame to a CSV file
new_file_path = 'Train Label concatenated_values.csv'  # You can change the file name if needed
new_df.to_csv(new_file_path, index=False)

print(f"File saved as {new_file_path}")

File saved as Train Label concatenated_values.csv


In [28]:
import csv

# Replace 'file1.csv' and 'file2.csv' with your actual file names
file1 = '/kaggle/working/Train Image concatenated_values.csv'
file2 = '/kaggle/working/Train Label concatenated_values.csv'
output_file = 'trained_output_1.csv'

column_name_file1 = 'Quiz'
column_name_file2 = 'Solution'

# Read data from the first CSV file
data1 = []
with open(file1, 'r') as csvfile1:
    reader1 = csv.reader(csvfile1)
    for row in reader1:
        data1.append(row[0])  # Assuming the column to combine from file1 is the first column

# Read data from the second CSV file
data2 = []
with open(file2, 'r') as csvfile2:
    reader2 = csv.reader(csvfile2)
    for row in reader2:
        data2.append(row[0])  # Assuming the column to combine from file2 is the first column

# Combine the two columns
combined_data = list(zip(data1, data2))
combined_header = [column_name_file1, column_name_file2]

# Write the combined data to a new CSV file
with open(output_file, 'w', newline='') as csvfile_combined:
    writer_combined = csv.writer(csvfile_combined)
    writer_combined.writerow(combined_header)
    # Write header if needed
    # writer_combined.writerow(['Column_from_file1', 'Column_from_file2'])
    
    # Write the combined data
    writer_combined.writerows(combined_data)


In [32]:
df = pd.read_csv('/kaggle/working/trained_final.csv')
df.head(5)

Unnamed: 0,Concatenated Values,Concatenated Values.1
0,0636554170018000590950008620700009000000700203...,6832954174218673597953418625721349968465771233...
1,1007540320002060070048390500010780000405903100...,1697548323582169472748396515314782967465923189...
2,7000508398000250409300782002798004000004970266...,7126548398659237419341782652798614533584971266...
3,0000930078030000290590824039003502016001007081...,2145936878637415297596824139783542616351297481...
4,6507300901986000070370002560003456020000010762...,6527348912986524374371982569713456823852619742...


In [31]:
import csv

# Replace 'input_file.csv' and 'output_file.csv' with your actual file names
input_file = '/kaggle/working/trained_output_1.csv'
output_file = 'trained_final.csv'

# Read the CSV file, skipping the first row
with open(input_file, 'r') as infile:
    reader = csv.reader(infile)
    header = next(reader)  # Read and skip the header

    # Read the remaining rows
    data = [row for row in reader]

# Write the remaining rows to a new CSV file
with open(output_file, 'w', newline='') as outfile:
    writer = csv.writer(outfile)

    # Write the header to the output file if needed
    # writer.writerow(header)

    # Write the remaining rows to the output file
    writer.writerows(data)
