# Sodoku Quality Test 

In [None]:
import sys
import pandas as pd
import numpy as np
sys.path.append("..")

In [None]:
path = "../data/"
data = pd.read_csv(path+"sudoku_1_mio_sequenced_columns.csv")
#data = data[:10]
display(data)

## All cells are filled => no Null values

In [None]:
data = data[~data.isnull().any(axis=1)]

## All numbers must be between 0 and 9 for the quizzes 

In [None]:
query_conditions = []
# Iterate over all quizzes col names 
for col_name in data.columns[:81]:
    query_conditions.append (col_name + " >= 0 & " + col_name+ " <= 9")
data = data.query(" & ".join(query_conditions))

## Solutions must contain numbers between 1 and 9

In [None]:
query_conditions = []
# Iterate over all col names with solutions 
for col_name in data.columns[81:]:
    query_conditions.append (col_name + " > 0 & " + col_name + " <=9")
data = data.query(" & ".join(query_conditions))

## All cells != 0 must have identical content

In [None]:
# Build Query
query_conditions = [] 
num_cols = int(data.columns.size/2 )
for i in range(num_cols):
    query_conditions.append (data.columns[i]+ " > 0 & " + data.columns[i] +" == "+data.columns[i+num_cols])
data = data.query(" | ".join(query_conditions))
data

## Checking Solutions: Checking all Rows and Columns

In [None]:
# Checking each row
#start = 81
df_only_solutions = data[data.columns[81:]]
display(df_only_solutions)

def is_in_range_and_unique(values, min_val, max_val):
    """Check if the list given contains only unique elements"""
    if len(values) == len(set(values)):
        return min(values) == min_val and max(values)== max_val 
    return False

def check_unique_num_in_row_and_col(row_as_series):
    """Input: pd.Series, Output boolean"""
    for start in range(0,row_as_series.size,9):
        if not is_in_range_and_unique (row_as_series[start:start+9].to_list(),1,9):
            return False
    for start in range(0,9):
        list_of_indexes = [*range (start,row_as_series.size,9)]
        if not is_in_range_and_unique (row_as_series.iloc[list_of_indexes].to_list(),1,9):
            return False
    return True

data["test_row"] = df_only_solutions.apply(lambda x: check_unique_num_in_row_and_col(x),axis=1)
data = data.query("test_row == True")
data = data.drop(columns=["test_row"])
data
       

## Checking Solutions: Checking all Squares

In [None]:
# check square
# there are nine 3x3 squares 
# the first square has the indexes (S1A, S1B, S1C, 
#                                   S2A, S2B, S2C,
#                                   S3A, S3B, S3C)

def get_square_definition ():
    """Returns the square positions of a 9*9 sodoku"""
    col_names = ["A","B","C","D","E","F","G","H","I"] 
    row_nums = [*range(1,10)]

    col_chunks = np.array_split(col_names, 3)
    row_chunks = np.array_split(row_nums, 3)
    square_pos=[]
    for curr_rows in row_chunks :
        for curr_cols in col_chunks:
            pos_names = []        
            for curr_row in curr_rows :
                for curr_col in curr_cols:
                    pos_names.append("S"+str(curr_row)+str(curr_col))
            square_pos.append(pos_names)
    return square_pos       

def check_squares(row_as_series:pd.Series) -> bool:
    """Input=pd.Series Output = True/False"""
    for curr_square in get_square_definition() : 
        if not is_in_range_and_unique(row_as_series.get(curr_square).to_list(),1,9):
            return False
    return True
    

df_only_solutions = data[data.columns[81:]]
#df_only_solutions.apply(lambda x: check_squares(x),axis=1)
data["test_row"] = df_only_solutions.apply(lambda x: check_squares(x),axis=1)
data = data.query("test_row == True")
data = data.drop(columns=["test_row"])
data