In [52]:
import sys
sys.path.append('../src')
from sudoku import Sudoku
from time_utils import Timer
import solver
import tester
import statistics as stat
from scipy import stats as sci_stats

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

# Testing

## Test 1

Paired t-test. The goal is to compare two methods to see if the metric we choose (time, guesses) differs.

### Example

In [193]:
trials = 100

In [194]:
hard_board = tester.get_sugoku_board('hard')

In [195]:
hard_game = Sudoku(hard_board)

In [196]:
solvers = [solver.cellwise_mixed_priority_backtracking_manual, 
           solver.priority_backtracking_manual]

In [197]:
timers = [Timer(f.__name__) for f in solvers]

In [198]:
for timer, sudo_solver in zip(timers, solvers):
    solver_guesses = []
    for _ in range(trials):
        game = Sudoku(hard_board)
        empty = game.number_empty()
        timer.start()
        done, guesses = sudo_solver(game)
        timer.stop(verbose=False)
        if not done:
            print('unsolved')
        else:
            solver_guesses.append(guesses / empty)

    print("{:50}: Avg Number of Percentage Guess: {:.5f}".format(sudo_solver.__name__, stat.mean(solver_guesses)))
for timer in timers:
    timer.summary()

cellwise_mixed_priority_backtracking_manual       : Avg Number of Percentage Guess: 0.21053
priority_backtracking_manual                      : Avg Number of Percentage Guess: 1.00000
------------------------------
Name               : cellwise_mixed_priority_backtracking_manual
Count              : 100
Total Time         : 0.79642s
Average Time       : 0.00796s
Standard Deviation : 0.00177s
------------------------------
------------------------------
Name               : priority_backtracking_manual  
Count              : 100
Total Time         : 0.77173s
Average Time       : 0.00772s
Standard Deviation : 0.00049s
------------------------------


In [199]:
sci_stats.ttest_rel(timers[0].times, timers[1].times)

Ttest_relResult(statistic=1.3735876260935436, pvalue=0.17267225927746885)