In [52]:
import sys
sys.path.append('../src')
from sudoku import Sudoku
from time_utils import Timer
import solver
import tester
import statistics as stat
from scipy import stats as sci_stats

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

# Testing

## Test 1

Paired t-test. The goal is to compare two methods to see if the metric we choose (time, guesses) differs.

### Example

In [158]:
trials = 100

In [159]:
hard_board = tester.get_sugoku_board('hard')

In [160]:
hard_game = Sudoku(hard_board)

In [161]:
solvers = [solver.cellwise_mixed_priority_backtracking_manual, 
           solver.priority_backtracking_manual]

In [162]:
timers = [Timer(f.__name__) for f in solvers]

In [163]:
for timer, sudo_solver in zip(timers, solvers):
    solver_guesses = []
    for _ in range(trials):
        game = Sudoku(hard_board)
        empty = game.number_empty()
        timer.start()
        done, guesses = sudo_solver(game)
        timer.stop(verbose=False)
        if not done:
            print('unsolved')
        else:
            solver_guesses.append(guesses / empty)

    print("{:50}: Avg Number of Percentage Guess: {:.5f}".format(sudo_solver.__name__, stat.mean(solver_guesses)))
for timer in timers:
    timer.summary()

cellwise_mixed_priority_backtracking_manual       : Avg Number of Percentage Guess: 0.87037
priority_backtracking_manual                      : Avg Number of Percentage Guess: 3.59259
------------------------------
Name               : cellwise_mixed_priority_backtracking_manual
Count              : 100
Total Time         : 2.77990s
Average Time       : 0.02780s
Standard Deviation : 0.00323s
------------------------------
------------------------------
Name               : priority_backtracking_manual  
Count              : 100
Total Time         : 2.76731s
Average Time       : 0.02767s
Standard Deviation : 0.00086s
------------------------------


In [164]:
sci_stats.ttest_rel(timers[0].times, timers[1].times)

Ttest_relResult(statistic=0.3942341293335388, pvalue=0.6942561222316737)