# <center>Artificial Neural Networks and Cognitive Models</center>

# <center>TASK - 2 Data Preparation</center>

In [1]:
import numpy as np
from PIL import Image
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

quizzes = np.load("C:\Semester 2\ANN\Portfolio2\Original_Datasets\Train\imgs-002.npy")

solutions = np.load("C:\Semester 2\ANN\Portfolio2\Original_Datasets\Train\labels-001.npy")

In [2]:
def get_digit_image_from_grid(sudoku_image, row, column, digit_size=28):
    # Determine the height and width of each cell in the grid
    grid_height, grid_width = sudoku_image.shape[:2]
    cell_height = grid_height // 9
    cell_width = grid_width // 9

    # Calculate the top-left corner of the specified cell
    start_y = row * cell_height
    start_x = column * cell_width

    # Crop the image to get the digit, ensuring it fits into the digit_size
    digit_image = Image.fromarray(sudoku_image[start_y:start_y+cell_height, start_x:start_x+cell_width])
    digit_image = digit_image.resize((digit_size, digit_size), Image.ANTIALIAS)

    # Convert Image to a NumPy array
    digit_array = np.array(digit_image)

    return digit_image, digit_array

In [3]:
import torch
import numpy as np
import csv
import time
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        # Convolutional Neural Network Layer 
        self.convolutaional_neural_network_layers = nn.Sequential(
                nn.Conv2d(in_channels=1, out_channels=12, kernel_size=3, padding=1, stride=1), # (N, 1, 28, 28) 
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2), 
                nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, padding=1, stride=1),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2))

        # Linear layer
        self.linear_layers = nn.Sequential(
                nn.Linear(in_features=24*7*7, out_features=64),          
                nn.ReLU(),
                nn.Dropout(p=0.2),
                nn.Linear(in_features=64, out_features=10)
        )

    # Defining the forward pass 
    def forward(self, x):
        x = self.convolutaional_neural_network_layers(x)
        x = x.view(x.size(0), -1)
        x = self.linear_layers(x)
        x = F.log_softmax(x, dim=1)
        return x

In [4]:
trained_model = torch.load(r"C:\Semester 2\ANN\Portfolio2\mymodel1.pt",map_location=torch.device('cpu'))

# Make digit predictions for suduko solutions

In [5]:
solutions_data = []
for image_idx in range(solutions.shape[0]):
    image_string = ''
    for row in range(0, 9):
        for col in range(0, 9):
            Images_image, _ = get_digit_image_from_grid(solutions[image_idx], row, col)
            img = np.array(Images_image)
            img = torch.tensor(img, dtype=torch.float32)
            img = img.to(device)
            img = img.view(-1, 1, 28, 28)
            with torch.no_grad():
                is_zero = torch.all(img == 0).item()
                if is_zero:
                    pred = 0
                else:
                    logits = trained_model.forward(img)
                    probabilities = F.softmax(logits, dim=1).detach().cpu().numpy().squeeze()
                    pred = np.argmax(probabilities)
            image_string += str(pred)
    solutions_data.append(image_string)

solutions_df = pd.DataFrame({'solutions': solutions_data})
solutions_df

Unnamed: 0,solutions
0,6832954174218673597953418625721349868465791233...
1,1697548323582169472748396515314782967465923189...
2,7126548398659237419341782652798614533584971266...
3,2145936878637415297596824139783542616351297481...
4,6527348911986524374371982569713456823852619742...
...,...
49995,1896243576279351845438172964167598239724835613...
49996,7413592689657823412386149751895364276572481933...
49997,2574639181347986256892157434219573863758264919...
49998,9382571461524639786748912537256193844693825173...


# Make digit predictions for suduko quizzes

In [6]:
quizzes_data = []
for image_idx in range(quizzes.shape[0]):
    image_string = ''
    for row in range(0, 9):
        for col in range(0, 9):
            Images_image, _ = get_digit_image_from_grid(quizzes[image_idx], row, col)
            img = np.array(Images_image)
            img = torch.tensor(img, dtype=torch.float32)
            img = img.to(device)
            img = img.view(-1, 1, 28, 28)
            with torch.no_grad():
                is_zero = torch.all(img == 0).item()
                if is_zero:
                    pred = 0
                else:
                    logits = trained_model.forward(img)
                    probabilities = F.softmax(logits, dim=1).detach().cpu().numpy().squeeze()
                    pred = np.argmax(probabilities)
            image_string += str(pred)
    quizzes_data.append(image_string)

quizzes_df = pd.DataFrame({'quizzes': quizzes_data})
quizzes_df

Unnamed: 0,quizzes
0,0832954170018000590950008620700009000000700203...
1,1007540320002060070048390500010780000405903100...
2,7000508398000230409300782002798004000004970266...
3,0000930078030000290590824039003502016001007081...
4,6507300901986000070370002560003456020000010742...
...,...
49995,0896000576200001000408102060107098239020800000...
49996,7413090080050800402300000701000360276570481903...
49997,2004639101040080000800057034200070060008064919...
49998,9082000000024609706748000507000193800000025003...


In [7]:
import pandas as pd

training_data = pd.concat([quizzes_df, solutions_df], axis=1)
training_data.columns = ['quizzes', 'solutions']
training_data

Unnamed: 0,quizzes,solutions
0,0832954170018000590950008620700009000000700203...,6832954174218673597953418625721349868465791233...
1,1007540320002060070048390500010780000405903100...,1697548323582169472748396515314782967465923189...
2,7000508398000230409300782002798004000004970266...,7126548398659237419341782652798614533584971266...
3,0000930078030000290590824039003502016001007081...,2145936878637415297596824139783542616351297481...
4,6507300901986000070370002560003456020000010742...,6527348911986524374371982569713456823852619742...
...,...,...
49995,0896000576200001000408102060107098239020800000...,1896243576279351845438172964167598239724835613...
49996,7413090080050800402300000701000360276570481903...,7413592689657823412386149751895364276572481933...
49997,2004639101040080000800057034200070060008064919...,2574639181347986256892157434219573863758264919...
49998,9082000000024609706748000507000193800000025003...,9382571461524639786748912537256193844693825173...


# Make digit predictions for testset

In [9]:
test = np.load(r"C:/Semester 2/ANN/Portfolio2/Original_Datasets/Test/Test_imgs.npy")

testing_data = []
for image_idx in range(test.shape[0]):
    image_string = ''
    for row in range(0, 9):
        for col in range(0, 9):
            test_image, _ = get_digit_image_from_grid(test[image_idx], row, col)
            img = np.array(test_image)
            img = torch.tensor(img, dtype=torch.float32)
            img = img.to(device)
            img = img.view(-1, 1, 28, 28)
            with torch.no_grad():
                is_zero = torch.all(img == 0).item()
                if is_zero:
                    pred = 0
                else:
                    logits = trained_model.forward(img)
                    probabilities = F.softmax(logits, dim=1).detach().cpu().numpy().squeeze()
                    pred = np.argmax(probabilities)
            image_string += str(pred)
    testing_data.append(image_string)

testing_df = pd.DataFrame({'test': testing_data})
testing_df

Unnamed: 0,test
0,9027008137614000250800000706070950009106000542...
1,0708010200207300001060920040900754800520080038...
2,0009001650806000306012437008070950230307046009...
3,1580096070000760027264183000157040000040800510...
4,0020500096038002059700008361080005032060350800...
...,...
9995,0001050300407062100009405607510943060202507040...
9996,1058000404001300056030509170006005315060030092...
9997,0000278000703000900487500367800000040524009783...
9998,0020950000391800001050020980017200803275009164...


In [11]:
#Save Both csv's
testing_df.to_csv('C:\Semester 2\ANN\Portfolio2\Prepared_datasets\\testing_df.csv', index=False)
training_data.to_csv('C:\Semester 2\ANN\Portfolio2\Prepared_datasets\\training_data.csv', index=False)

**Name: THARUN KUMAR KORINE PALLI**

**Matriculation Number: 5123708**

**Email: tharunkumar.korinepalli@study.thws.de**

# <center>THE END</center>