In [3]:
import requests
import json
import re
from tqdm import tqdm

# API URL for Fireworks model completion endpoint
api_url = "https://api.fireworks.ai/inference/v1/completions"

# Authorization token (replace with your actual Fireworks API Key)
api_key = "fw_3ZcbBTZRScdUSrVf2NuwPRJ2"

# The deployed model identifier (from your output)
model_id = "accounts/myonlineretail2023-b215a8/deployedModels/d901106fd727401eb7855c700ea69b29-8a5acd69"

# Function to query the model
def query_model(prompt):
    headers = {
        'Authorization': f'Bearer {api_key}',
        'Content-Type': 'application/json'
    }

    data = {
        "model": model_id,
        "prompt": prompt,
        "max_tokens": 100
    }

    response = requests.post(api_url, headers=headers, json=data)
    
    if response.status_code == 200:
        return response.json()["choices"][0]["text"]
    else:
        print(f"Error querying model: {response.status_code}, {response.text}")
        return None


def extract_number_from_output(output):
    match = re.search(r"####\s*([\d,]+(\.\d+)?)", output)
    if match:
        return float(match.group(1).replace(',', ''))
    return None

# Load the validation dataset (GSM8K) from a JSONL file
def load_validation_dataset(file_path):
    dataset = []
    with open(file_path, 'r') as f:
        for line in f:
            dataset.append(json.loads(line))
    return dataset



correct = 0
total = 0
reasoning_outputs = []

# Load the validation dataset and process it
validation_file_path = "gsm8k_val.jsonl"
reasoning_output_file = "reasoning_outputs.jsonl"

# File to save the progress
progress_file = "correct_progress_mistral.txt"

dataset = load_validation_dataset(validation_file_path)

for i, example in tqdm(enumerate(dataset)):
    example = dataset[i]
    prompt = "Give the final answer as #### <answer> where answer is the final numerical value ," + example['question']  # Use 'prompt' field to query the model
    try:
        ground_truth = float(re.search(r"####\s*(\d+(\.\d+)?)", example['answer']).group(1))
    except:
        continue

    if ground_truth is None:
        continue
    
    # Query the model with the prompt
    model_output = query_model(prompt)
    
    if model_output is None:
        continue

    # Extract the predicted final answer from the output
    predicted_number = extract_number_from_output(model_output)
    
    if predicted_number is None:
        continue
    else:
        reasoning_outputs.append({
            "question": example["question"],
            "model_output": model_output,
            "ground_truth": ground_truth
        })
        
        # Compare the predicted number with the ground truth
        if predicted_number == ground_truth:
            correct += 1
            print(f"Match: Predicted {predicted_number}, Expected {ground_truth}")
            print(f"Current correct count: {correct}")  # Print the current value of correct after each match
        else:
            print(f"Mismatch: Predicted {predicted_number}, Expected {ground_truth}")

        total += 1

    # Save progress to a file
    with open(progress_file, "w") as f:
        f.write(f"Correct: {correct}, Total: {total}\n")

# Save the reasoning outputs to a separate JSONL file
with open(progress_file, 'w') as f:
    for reasoning in reasoning_outputs:
        json.dump(reasoning, f)
        f.write("\n")

# Print final correct count before calculating accuracy
print(f"Final correct count: {correct}")  # Final correct count
accuracy = (correct / total) * 100
print(f"Accuracy on validation dataset: {accuracy}%")

1it [00:03,  3.47s/it]

Match: Predicted 18.0, Expected 18.0
Current correct count: 1


2it [00:04,  1.92s/it]

Match: Predicted 3.0, Expected 3.0
Current correct count: 2


5it [00:09,  1.60s/it]

Match: Predicted 20.0, Expected 20.0
Current correct count: 3


13it [00:23,  1.74s/it]

Mismatch: Predicted 60.0, Expected 13.0


19it [00:33,  1.67s/it]

Match: Predicted 7.0, Expected 7.0
Current correct count: 4


20it [00:35,  1.64s/it]

Mismatch: Predicted 2.0, Expected 6.0


22it [00:38,  1.68s/it]

Mismatch: Predicted 3.0, Expected 14.0


23it [00:39,  1.45s/it]

Mismatch: Predicted 5.0, Expected 7.0


29it [00:49,  1.66s/it]

Match: Predicted 25.0, Expected 25.0
Current correct count: 5


30it [00:51,  1.68s/it]

Mismatch: Predicted 3.0, Expected 104.0


35it [00:59,  1.57s/it]

Mismatch: Predicted 20.5, Expected 23.0


36it [01:01,  1.53s/it]

Match: Predicted 9.0, Expected 9.0
Current correct count: 6


39it [01:05,  1.48s/it]

Mismatch: Predicted 6.67, Expected 10.0


41it [01:08,  1.54s/it]

Match: Predicted 8.0, Expected 8.0
Current correct count: 7


42it [01:10,  1.56s/it]

Match: Predicted 200.0, Expected 200.0
Current correct count: 8


43it [01:12,  1.58s/it]

Match: Predicted 26.0, Expected 26.0
Current correct count: 9


44it [01:13,  1.42s/it]

Mismatch: Predicted 200.0, Expected 48.0


49it [01:22,  1.68s/it]

Mismatch: Predicted 4.0, Expected 8.0


51it [01:25,  1.69s/it]

Match: Predicted 294.0, Expected 294.0
Current correct count: 10


52it [01:27,  1.69s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 11


57it [01:35,  1.59s/it]

Match: Predicted 3.0, Expected 3.0
Current correct count: 12


60it [01:40,  1.59s/it]

Mismatch: Predicted 157.0, Expected 187.0


61it [01:42,  1.57s/it]

Match: Predicted 17.0, Expected 17.0
Current correct count: 13


63it [01:45,  1.53s/it]

Mismatch: Predicted 52500.0, Expected 25000.0


65it [01:48,  1.63s/it]

Mismatch: Predicted 36.0, Expected 300.0


67it [01:52,  1.80s/it]

Mismatch: Predicted 88.0, Expected 48.0


69it [01:55,  1.43s/it]

Mismatch: Predicted 8.0, Expected 36.0


70it [01:56,  1.46s/it]

Match: Predicted 60.0, Expected 60.0
Current correct count: 14


72it [01:59,  1.37s/it]

Match: Predicted 60.0, Expected 60.0
Current correct count: 15


80it [02:12,  1.51s/it]

Match: Predicted 70.0, Expected 70.0
Current correct count: 16


81it [02:13,  1.35s/it]

Match: Predicted 10.0, Expected 10.0
Current correct count: 17


82it [02:15,  1.46s/it]

Match: Predicted 17.0, Expected 17.0
Current correct count: 18


83it [02:16,  1.53s/it]

Match: Predicted 623.0, Expected 623.0
Current correct count: 19


84it [02:17,  1.36s/it]

Match: Predicted 600.0, Expected 600.0
Current correct count: 20


85it [02:19,  1.36s/it]

Mismatch: Predicted 8.0, Expected 15.0


89it [02:25,  1.48s/it]

Match: Predicted 8000.0, Expected 8000.0
Current correct count: 21


90it [02:27,  1.51s/it]

Mismatch: Predicted 18.0, Expected 24.0


92it [02:30,  1.50s/it]

Match: Predicted 28.0, Expected 28.0
Current correct count: 22


93it [02:31,  1.50s/it]

Mismatch: Predicted 11.0, Expected 4.0


94it [02:32,  1.42s/it]

Mismatch: Predicted 44.0, Expected 36.0


97it [02:38,  1.61s/it]

Match: Predicted 3.0, Expected 3.0
Current correct count: 23


99it [02:41,  1.50s/it]

Mismatch: Predicted 30.0, Expected 5.0


102it [02:46,  1.66s/it]

Mismatch: Predicted 9.75, Expected 6.0


103it [02:48,  1.66s/it]

Mismatch: Predicted 16.0, Expected 26.0


104it [02:49,  1.68s/it]

Mismatch: Predicted 139.0, Expected 140.0


106it [02:52,  1.59s/it]

Match: Predicted 20.0, Expected 20.0
Current correct count: 24


113it [03:05,  1.77s/it]

Match: Predicted 24.0, Expected 24.0
Current correct count: 25


114it [03:06,  1.66s/it]

Match: Predicted 25.0, Expected 25.0
Current correct count: 26


117it [03:11,  1.70s/it]

Mismatch: Predicted 13.0, Expected 42.0


118it [03:13,  1.53s/it]

Mismatch: Predicted 720.0, Expected 360.0


121it [03:17,  1.60s/it]

Match: Predicted 240.0, Expected 240.0
Current correct count: 27


127it [03:27,  1.54s/it]

Mismatch: Predicted 44.0, Expected 82.0


131it [03:35,  1.82s/it]

Mismatch: Predicted 10.0, Expected 30.0


132it [03:37,  1.79s/it]

Match: Predicted 940.0, Expected 940.0
Current correct count: 28


134it [03:40,  1.69s/it]

Match: Predicted 13.0, Expected 13.0
Current correct count: 29


135it [03:41,  1.59s/it]

Match: Predicted 720.0, Expected 720.0
Current correct count: 30


136it [03:42,  1.37s/it]

Mismatch: Predicted 55.0, Expected 40.0


137it [03:43,  1.26s/it]

Mismatch: Predicted 18.0, Expected 6.0


140it [03:48,  1.55s/it]

Match: Predicted 70.0, Expected 70.0
Current correct count: 31


141it [03:50,  1.61s/it]

Match: Predicted 20.0, Expected 20.0
Current correct count: 32


143it [03:53,  1.59s/it]

Match: Predicted 140.0, Expected 140.0
Current correct count: 33


149it [04:04,  1.73s/it]

Mismatch: Predicted 14.0, Expected 30.0


150it [04:05,  1.58s/it]

Match: Predicted 16.0, Expected 16.0
Current correct count: 34


153it [04:11,  1.76s/it]

Match: Predicted 4.0, Expected 4.0
Current correct count: 35


160it [04:22,  1.60s/it]

Mismatch: Predicted 7.0, Expected 15.0


161it [04:24,  1.46s/it]

Mismatch: Predicted 8.0, Expected 16.0


165it [04:30,  1.55s/it]

Mismatch: Predicted 55.0, Expected 15.0


167it [04:33,  1.53s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 36


169it [04:36,  1.46s/it]

Mismatch: Predicted 10.0, Expected 18.0


172it [04:41,  1.57s/it]

Mismatch: Predicted 720.0, Expected 1210.0


179it [04:53,  1.69s/it]

Mismatch: Predicted 100.0, Expected 122.0


181it [04:57,  1.64s/it]

Match: Predicted 20.0, Expected 20.0
Current correct count: 37


183it [05:00,  1.72s/it]

Mismatch: Predicted 30.0, Expected 23.0


186it [05:03,  1.20s/it]

Match: Predicted 30.0, Expected 30.0
Current correct count: 38


191it [05:11,  1.39s/it]

Match: Predicted 360.0, Expected 360.0
Current correct count: 39


192it [05:12,  1.40s/it]

Mismatch: Predicted 1000.0, Expected 5.0


195it [05:17,  1.39s/it]

Match: Predicted 10.0, Expected 10.0
Current correct count: 40


210it [05:43,  1.75s/it]

Mismatch: Predicted 60.0, Expected 145.0


211it [05:45,  1.74s/it]

Mismatch: Predicted 0.0, Expected 10.0


213it [05:48,  1.61s/it]

Mismatch: Predicted 2.5, Expected 5.0


218it [05:56,  1.67s/it]

Match: Predicted 15.0, Expected 15.0
Current correct count: 41


224it [06:08,  1.83s/it]

Mismatch: Predicted 25000.0, Expected 20000.0


230it [06:16,  1.49s/it]

Mismatch: Predicted 54.0, Expected 21.0


236it [06:27,  1.60s/it]

Match: Predicted 10.0, Expected 10.0
Current correct count: 42


237it [06:28,  1.63s/it]

Mismatch: Predicted 150.0, Expected 31.0


238it [06:30,  1.57s/it]

Match: Predicted 90.0, Expected 90.0
Current correct count: 43


243it [06:39,  1.64s/it]

Mismatch: Predicted 30.0, Expected 3.0


246it [06:44,  1.67s/it]

Mismatch: Predicted 3.0, Expected 7.0


248it [06:47,  1.67s/it]

Match: Predicted 32.0, Expected 32.0
Current correct count: 44


249it [06:49,  1.60s/it]

Match: Predicted 300.0, Expected 300.0
Current correct count: 45


252it [06:53,  1.57s/it]

Mismatch: Predicted 34.0, Expected 70.0


254it [06:56,  1.41s/it]

Match: Predicted 18.0, Expected 18.0
Current correct count: 46


255it [06:58,  1.50s/it]

Mismatch: Predicted 36.0, Expected 84.0


261it [07:07,  1.40s/it]

Mismatch: Predicted 33.0, Expected 11.0


262it [07:08,  1.53s/it]

Match: Predicted 62.0, Expected 62.0
Current correct count: 47


263it [07:10,  1.56s/it]

Match: Predicted 270.0, Expected 270.0
Current correct count: 48


270it [07:21,  1.42s/it]

Match: Predicted 4.0, Expected 4.0
Current correct count: 49


273it [07:26,  1.54s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 50


280it [07:39,  1.76s/it]

Mismatch: Predicted 9.0, Expected 6.0


281it [07:40,  1.60s/it]

Mismatch: Predicted 4.5, Expected 7.0


283it [07:43,  1.57s/it]

Match: Predicted 195.0, Expected 195.0
Current correct count: 51


286it [07:48,  1.59s/it]

Match: Predicted 21.0, Expected 21.0
Current correct count: 52


288it [07:51,  1.47s/it]

Mismatch: Predicted 44.0, Expected 8.0


293it [08:00,  1.80s/it]

Mismatch: Predicted 8.0, Expected 75.0


301it [08:14,  1.69s/it]

Match: Predicted 78.0, Expected 78.0
Current correct count: 53


306it [08:22,  1.64s/it]

Match: Predicted 4.0, Expected 4.0
Current correct count: 54


307it [08:23,  1.46s/it]

Match: Predicted 10.0, Expected 10.0
Current correct count: 55


318it [08:42,  1.70s/it]

Match: Predicted 12.0, Expected 12.0
Current correct count: 56


324it [08:52,  1.67s/it]

Mismatch: Predicted 639.0, Expected 75.0


325it [08:54,  1.69s/it]

Mismatch: Predicted 1.4, Expected 14.0


327it [08:57,  1.58s/it]

Mismatch: Predicted 18.0, Expected 14.0


331it [09:04,  1.66s/it]

Mismatch: Predicted 28.85, Expected 31.0


333it [09:07,  1.61s/it]

Match: Predicted 44.0, Expected 44.0
Current correct count: 57


334it [09:08,  1.42s/it]

Mismatch: Predicted 50.0, Expected 100.0


335it [09:09,  1.24s/it]

Mismatch: Predicted 4.0, Expected 6.0


336it [09:10,  1.16s/it]

Match: Predicted 310.0, Expected 310.0
Current correct count: 58


337it [09:11,  1.11s/it]

Mismatch: Predicted 48.0, Expected 72.0


338it [09:12,  1.16s/it]

Mismatch: Predicted 8.0, Expected 1.0


339it [09:13,  1.18s/it]

Mismatch: Predicted 56.0, Expected 60.0


345it [09:24,  1.70s/it]

Mismatch: Predicted 7.0, Expected 2.0


347it [09:27,  1.47s/it]

Mismatch: Predicted 27.0, Expected 9.0


353it [09:37,  1.60s/it]

Match: Predicted 21.0, Expected 21.0
Current correct count: 59


358it [09:45,  1.52s/it]

Mismatch: Predicted 3.0, Expected 2.0


361it [09:50,  1.57s/it]

Match: Predicted 25.0, Expected 25.0
Current correct count: 60


365it [09:57,  1.73s/it]

Mismatch: Predicted 48.0, Expected 49.0


366it [09:58,  1.51s/it]

Mismatch: Predicted 9.0, Expected 18.0


367it [10:00,  1.46s/it]

Mismatch: Predicted 27.0, Expected 9.0


369it [10:03,  1.57s/it]

Mismatch: Predicted 32.0, Expected 18.0


371it [10:06,  1.48s/it]

Match: Predicted 320.0, Expected 320.0
Current correct count: 61


373it [10:09,  1.67s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 62


376it [10:14,  1.55s/it]

Match: Predicted 48.0, Expected 48.0
Current correct count: 63


379it [10:19,  1.56s/it]

Match: Predicted 15.0, Expected 15.0
Current correct count: 64


380it [10:20,  1.51s/it]

Match: Predicted 21.0, Expected 21.0
Current correct count: 65


385it [10:29,  1.62s/it]

Mismatch: Predicted 7.2, Expected 32.0


387it [10:32,  1.56s/it]

Match: Predicted 80.0, Expected 80.0
Current correct count: 66


388it [10:33,  1.39s/it]

Mismatch: Predicted 40.0, Expected 36.0


389it [10:35,  1.45s/it]

Match: Predicted 88.0, Expected 88.0
Current correct count: 67


391it [10:38,  1.50s/it]

Match: Predicted 12.0, Expected 12.0
Current correct count: 68


393it [10:41,  1.61s/it]

Mismatch: Predicted 40.0, Expected 34.0


398it [10:49,  1.55s/it]

Match: Predicted 25.0, Expected 25.0
Current correct count: 69


400it [10:52,  1.58s/it]

Mismatch: Predicted 2.0, Expected 12.0


401it [10:54,  1.54s/it]

Mismatch: Predicted 72.0, Expected 48.0


408it [11:05,  1.45s/it]

Mismatch: Predicted 1000.0, Expected 2000.0


409it [11:06,  1.15s/it]

Match: Predicted 20.0, Expected 20.0
Current correct count: 70


414it [11:14,  1.53s/it]

Match: Predicted 25.0, Expected 25.0
Current correct count: 71


427it [11:37,  1.80s/it]

Mismatch: Predicted 7.0, Expected 11.0


432it [11:46,  1.73s/it]

Match: Predicted 1.0, Expected 1.0
Current correct count: 72


433it [11:48,  1.69s/it]

Mismatch: Predicted 60.0, Expected 30.0


436it [11:53,  1.68s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 73


437it [11:54,  1.41s/it]

Mismatch: Predicted 12.0, Expected 15.0


438it [11:55,  1.49s/it]

Mismatch: Predicted 5.0, Expected 7.0


440it [11:59,  1.58s/it]

Mismatch: Predicted 24.0, Expected 17.0


441it [12:00,  1.57s/it]

Mismatch: Predicted 66.0, Expected 98.0


445it [12:07,  1.68s/it]

Mismatch: Predicted 90.0, Expected 20.0


448it [12:12,  1.67s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 74


456it [12:27,  1.79s/it]

Mismatch: Predicted 1.0, Expected 1920.0


457it [12:28,  1.77s/it]

Mismatch: Predicted 40.0, Expected 78.0


459it [12:31,  1.63s/it]

Mismatch: Predicted 67.0, Expected 35.0


462it [12:36,  1.65s/it]

Match: Predicted 9.0, Expected 9.0
Current correct count: 75


463it [12:37,  1.44s/it]

Match: Predicted 71.0, Expected 71.0
Current correct count: 76


464it [12:38,  1.31s/it]

Mismatch: Predicted 16.0, Expected 18.0


465it [12:40,  1.40s/it]

Mismatch: Predicted 5.0, Expected 6.0


467it [12:43,  1.45s/it]

Mismatch: Predicted 21.0, Expected 1.0


468it [12:44,  1.45s/it]

Mismatch: Predicted 720.0, Expected 1200.0


472it [12:51,  1.62s/it]

Mismatch: Predicted 60.0, Expected 80.0


473it [12:52,  1.35s/it]

Mismatch: Predicted 4.0, Expected 6.0


474it [12:53,  1.29s/it]

Mismatch: Predicted 12.0, Expected 10.0


477it [12:58,  1.53s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 77


478it [13:00,  1.49s/it]

Match: Predicted 20.0, Expected 20.0
Current correct count: 78


481it [13:05,  1.65s/it]

Match: Predicted 11.0, Expected 11.0
Current correct count: 79


483it [13:08,  1.66s/it]

Match: Predicted 26.0, Expected 26.0
Current correct count: 80


485it [13:11,  1.57s/it]

Match: Predicted 240.0, Expected 240.0
Current correct count: 81


486it [13:13,  1.59s/it]

Match: Predicted 220.0, Expected 220.0
Current correct count: 82


487it [13:14,  1.52s/it]

Mismatch: Predicted 12.0, Expected 6.0


491it [13:18,  1.12s/it]

Match: Predicted 4.0, Expected 4.0
Current correct count: 83


492it [13:20,  1.24s/it]

Match: Predicted 16.0, Expected 16.0
Current correct count: 84


493it [13:21,  1.26s/it]

Match: Predicted 32.0, Expected 32.0
Current correct count: 85


496it [13:26,  1.44s/it]

Mismatch: Predicted 50.0, Expected 200.0


498it [13:30,  1.60s/it]

Mismatch: Predicted 224.0, Expected 112.0


504it [13:38,  1.40s/it]

Mismatch: Predicted 16.0, Expected 18.0


512it [13:52,  1.58s/it]

Match: Predicted 110.0, Expected 110.0
Current correct count: 86


513it [13:54,  1.62s/it]

Match: Predicted 120.0, Expected 120.0
Current correct count: 87


517it [14:01,  1.69s/it]

Mismatch: Predicted 1125.0, Expected 375.0


518it [14:02,  1.58s/it]

Match: Predicted 18.0, Expected 18.0
Current correct count: 88


520it [14:05,  1.44s/it]

Mismatch: Predicted 80.0, Expected 280.0


524it [14:11,  1.55s/it]

Mismatch: Predicted 64.0, Expected 74.0


526it [14:15,  1.57s/it]

Mismatch: Predicted 36.0, Expected 12.0


528it [14:18,  1.51s/it]

Mismatch: Predicted 300.0, Expected 48.0


530it [14:21,  1.69s/it]

Match: Predicted 11.0, Expected 11.0
Current correct count: 89


533it [14:26,  1.59s/it]

Mismatch: Predicted 22.0, Expected 25.0


539it [14:36,  1.49s/it]

Match: Predicted 18.0, Expected 18.0
Current correct count: 90


540it [14:37,  1.45s/it]

Mismatch: Predicted 60.0, Expected 35.0


542it [14:40,  1.51s/it]

Mismatch: Predicted 150.0, Expected 50.0


544it [14:43,  1.34s/it]

Mismatch: Predicted 16.0, Expected 6.0


546it [14:46,  1.33s/it]

Match: Predicted 30.0, Expected 30.0
Current correct count: 91


547it [14:47,  1.39s/it]

Mismatch: Predicted 52.0, Expected 130.0


556it [15:03,  1.63s/it]

Match: Predicted 2.0, Expected 2.0
Current correct count: 92


557it [15:04,  1.60s/it]

Match: Predicted 160.0, Expected 160.0
Current correct count: 93


559it [15:07,  1.50s/it]

Match: Predicted 3.0, Expected 3.0
Current correct count: 94


560it [15:08,  1.37s/it]

Mismatch: Predicted 3.0, Expected 16.0


562it [15:11,  1.37s/it]

Mismatch: Predicted 160.0, Expected 180.0


566it [15:18,  1.64s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 95


569it [15:23,  1.71s/it]

Mismatch: Predicted 155.0, Expected 175.0


570it [15:24,  1.50s/it]

Match: Predicted 21.0, Expected 21.0
Current correct count: 96


572it [15:28,  1.66s/it]

Match: Predicted 308.0, Expected 308.0
Current correct count: 97


574it [15:31,  1.56s/it]

Mismatch: Predicted 3300.0, Expected 600.0


576it [15:34,  1.60s/it]

Mismatch: Predicted 288.0, Expected 36.0


577it [15:36,  1.64s/it]

Mismatch: Predicted 2592.0, Expected 11232.0


578it [15:37,  1.44s/it]

Mismatch: Predicted 430.0, Expected 40.0


580it [15:40,  1.40s/it]

Match: Predicted 7.0, Expected 7.0
Current correct count: 98


581it [15:41,  1.51s/it]

Match: Predicted 500.0, Expected 500.0
Current correct count: 99


584it [15:47,  1.65s/it]

Mismatch: Predicted 1.0, Expected 120.0


585it [15:48,  1.67s/it]

Mismatch: Predicted 17.64, Expected 2.0


593it [16:02,  1.79s/it]

Match: Predicted 1.0, Expected 1.0
Current correct count: 100


595it [16:06,  1.65s/it]

Match: Predicted 42.0, Expected 42.0
Current correct count: 101


599it [16:12,  1.66s/it]

Mismatch: Predicted 66.0, Expected 69.0


601it [16:15,  1.57s/it]

Match: Predicted 10.0, Expected 10.0
Current correct count: 102


602it [16:17,  1.54s/it]

Mismatch: Predicted 101.0, Expected 104.0


603it [16:18,  1.55s/it]

Mismatch: Predicted 225.222, Expected 5.0


605it [16:22,  1.61s/it]

Mismatch: Predicted 44.0, Expected 12.0


610it [16:31,  1.67s/it]

Match: Predicted 7.0, Expected 7.0
Current correct count: 103


617it [16:42,  1.52s/it]

Mismatch: Predicted 16.0, Expected 10.0


622it [16:52,  1.80s/it]

Mismatch: Predicted 42000.0, Expected 3000.0


623it [16:53,  1.78s/it]

Mismatch: Predicted 10.0, Expected 7.0


624it [16:54,  1.51s/it]

Mismatch: Predicted 20.0, Expected 8.0


626it [16:58,  1.61s/it]

Match: Predicted 160.0, Expected 160.0
Current correct count: 104


627it [16:59,  1.56s/it]

Match: Predicted 72.0, Expected 72.0
Current correct count: 105


629it [17:02,  1.55s/it]

Match: Predicted 4.0, Expected 4.0
Current correct count: 106


631it [17:06,  1.71s/it]

Mismatch: Predicted 1.0, Expected 10.0


641it [17:23,  1.74s/it]

Mismatch: Predicted 44000.0, Expected 43.0


645it [17:30,  1.65s/it]

Match: Predicted 29.0, Expected 29.0
Current correct count: 107


646it [17:31,  1.55s/it]

Match: Predicted 48.0, Expected 48.0
Current correct count: 108


648it [17:34,  1.42s/it]

Match: Predicted 10.0, Expected 10.0
Current correct count: 109


649it [17:35,  1.39s/it]

Mismatch: Predicted 56.0, Expected 54.0


653it [17:42,  1.52s/it]

Match: Predicted 75.0, Expected 75.0
Current correct count: 110


655it [17:45,  1.50s/it]

Mismatch: Predicted 4.0, Expected 2.0


657it [17:48,  1.63s/it]

Mismatch: Predicted 19.0, Expected 10.0


663it [17:58,  1.60s/it]

Mismatch: Predicted 4.0, Expected 7.0


664it [18:00,  1.56s/it]

Mismatch: Predicted 293.0, Expected 193.0


668it [18:06,  1.56s/it]

Match: Predicted 53.0, Expected 53.0
Current correct count: 111


669it [18:08,  1.48s/it]

Match: Predicted 3.0, Expected 3.0
Current correct count: 112


671it [18:11,  1.60s/it]

Match: Predicted 4.0, Expected 4.0
Current correct count: 113


674it [18:16,  1.59s/it]

Match: Predicted 9.0, Expected 9.0
Current correct count: 114


676it [18:19,  1.67s/it]

Mismatch: Predicted 31.0, Expected 33.0


679it [18:24,  1.67s/it]

Match: Predicted 120.0, Expected 120.0
Current correct count: 115


680it [18:26,  1.51s/it]

Mismatch: Predicted 648.0, Expected 576.0


687it [18:38,  1.72s/it]

Match: Predicted 80.0, Expected 80.0
Current correct count: 116


696it [18:53,  1.65s/it]

Mismatch: Predicted 80.0, Expected 5.0


698it [18:56,  1.60s/it]

Mismatch: Predicted 48.0, Expected 27.0


700it [18:59,  1.47s/it]

Mismatch: Predicted 4.0, Expected 8.0


705it [19:08,  1.70s/it]

Mismatch: Predicted 26.0, Expected 50.0


706it [19:10,  1.73s/it]

Match: Predicted 120.0, Expected 120.0
Current correct count: 117


707it [19:11,  1.58s/it]

Match: Predicted 9.0, Expected 9.0
Current correct count: 118


708it [19:13,  1.68s/it]

Mismatch: Predicted 32.0, Expected 8.0


709it [19:15,  1.61s/it]

Match: Predicted 168.0, Expected 168.0
Current correct count: 119


714it [19:24,  1.73s/it]

Match: Predicted 576.0, Expected 576.0
Current correct count: 120


718it [19:30,  1.56s/it]

Mismatch: Predicted 8.75, Expected 5.0


726it [19:44,  1.60s/it]

Match: Predicted 25.0, Expected 25.0
Current correct count: 121


731it [19:52,  1.63s/it]

Match: Predicted 8.0, Expected 8.0
Current correct count: 122


734it [19:58,  1.77s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 123


735it [19:59,  1.66s/it]

Match: Predicted 17.0, Expected 17.0
Current correct count: 124


737it [20:02,  1.55s/it]

Mismatch: Predicted 13.0, Expected 3.0


738it [20:03,  1.35s/it]

Mismatch: Predicted 14.0, Expected 2.0


742it [20:10,  1.61s/it]

Match: Predicted 28.0, Expected 28.0
Current correct count: 125


745it [20:15,  1.75s/it]

Match: Predicted 4.0, Expected 4.0
Current correct count: 126


750it [20:24,  1.68s/it]

Mismatch: Predicted 6.0, Expected 3.0


752it [20:27,  1.66s/it]

Mismatch: Predicted 18.0, Expected 43.0


756it [20:34,  1.68s/it]

Match: Predicted 7.0, Expected 7.0
Current correct count: 127


759it [20:40,  1.68s/it]

Match: Predicted 133.0, Expected 133.0
Current correct count: 128


765it [20:50,  1.78s/it]

Match: Predicted 14.0, Expected 14.0
Current correct count: 129


766it [20:52,  1.65s/it]

Match: Predicted 490.0, Expected 490.0
Current correct count: 130


767it [20:53,  1.51s/it]

Mismatch: Predicted 0.0, Expected 12.0


768it [20:55,  1.55s/it]

Match: Predicted 60.0, Expected 60.0
Current correct count: 131


769it [20:56,  1.62s/it]

Mismatch: Predicted 600.0, Expected 675.0


770it [20:57,  1.47s/it]

Mismatch: Predicted 27.0, Expected 110.0


777it [21:09,  1.56s/it]

Match: Predicted 800.0, Expected 800.0
Current correct count: 132


782it [21:18,  1.62s/it]

Mismatch: Predicted 8.5, Expected 17.0


787it [21:27,  1.76s/it]

Mismatch: Predicted 14.0, Expected 142.0


789it [21:30,  1.66s/it]

Match: Predicted 100.0, Expected 100.0
Current correct count: 133


792it [21:34,  1.52s/it]

Match: Predicted 100.0, Expected 100.0
Current correct count: 134


793it [21:36,  1.55s/it]

Match: Predicted 75.0, Expected 75.0
Current correct count: 135


795it [21:40,  1.71s/it]

Match: Predicted 32.0, Expected 32.0
Current correct count: 136


796it [21:41,  1.65s/it]

Match: Predicted 20.0, Expected 20.0
Current correct count: 137


798it [21:45,  1.65s/it]

Mismatch: Predicted 9.0, Expected 540.0


800it [21:48,  1.58s/it]

Mismatch: Predicted 2.0, Expected 4.0


801it [21:49,  1.62s/it]

Mismatch: Predicted 421.5, Expected 428.0


803it [21:53,  1.62s/it]

Mismatch: Predicted 21.0, Expected 6.0


805it [21:55,  1.43s/it]

Mismatch: Predicted 200.0, Expected 20.0


807it [21:58,  1.44s/it]

Mismatch: Predicted 1.07, Expected 70.0


809it [22:02,  1.65s/it]

Mismatch: Predicted 5.0, Expected 12.0


812it [22:07,  1.55s/it]

Mismatch: Predicted 84.0, Expected 60.0


815it [22:12,  1.62s/it]

Mismatch: Predicted 226.0, Expected 11.0


818it [22:16,  1.48s/it]

Mismatch: Predicted 44.0, Expected 15.0


820it [22:20,  1.73s/it]

Mismatch: Predicted 6250.0, Expected 6.0


821it [22:21,  1.43s/it]

Mismatch: Predicted 960.0, Expected 720.0


827it [22:31,  1.61s/it]

Mismatch: Predicted 37.0, Expected 27.0


829it [22:34,  1.59s/it]

Match: Predicted 60.0, Expected 60.0
Current correct count: 138


839it [22:52,  1.88s/it]

Match: Predicted 15.0, Expected 15.0
Current correct count: 139


840it [22:54,  1.83s/it]

Match: Predicted 2.0, Expected 2.0
Current correct count: 140


841it [22:55,  1.71s/it]

Mismatch: Predicted 21.0, Expected 16.0


843it [22:59,  1.70s/it]

Match: Predicted 10.0, Expected 10.0
Current correct count: 141


844it [23:00,  1.65s/it]

Match: Predicted 19.0, Expected 19.0
Current correct count: 142


846it [23:04,  1.60s/it]

Match: Predicted 12.0, Expected 12.0
Current correct count: 143


850it [23:10,  1.62s/it]

Match: Predicted 75.0, Expected 75.0
Current correct count: 144


852it [23:13,  1.61s/it]

Match: Predicted 110.0, Expected 110.0
Current correct count: 145


854it [23:16,  1.54s/it]

Match: Predicted 15.0, Expected 15.0
Current correct count: 146


858it [23:24,  1.66s/it]

Match: Predicted 14000.0, Expected 14000.0
Current correct count: 147


868it [23:41,  1.63s/it]

Match: Predicted 7.0, Expected 7.0
Current correct count: 148


870it [23:44,  1.64s/it]

Match: Predicted 500.0, Expected 500.0
Current correct count: 149


871it [23:46,  1.62s/it]

Mismatch: Predicted 4.0, Expected 40.0


874it [23:51,  1.56s/it]

Mismatch: Predicted 12.08, Expected 12.0


875it [23:52,  1.50s/it]

Match: Predicted 132.0, Expected 132.0
Current correct count: 150


876it [23:54,  1.58s/it]

Match: Predicted 60.0, Expected 60.0
Current correct count: 151


879it [23:59,  1.63s/it]

Mismatch: Predicted 7.0, Expected 5.0


882it [24:04,  1.62s/it]

Mismatch: Predicted 14.0, Expected 16.0


884it [24:07,  1.55s/it]

Match: Predicted 25.0, Expected 25.0
Current correct count: 152


885it [24:08,  1.41s/it]

Match: Predicted 50.0, Expected 50.0
Current correct count: 153


888it [24:13,  1.69s/it]

Mismatch: Predicted 48.0, Expected 34.0


889it [24:15,  1.66s/it]

Match: Predicted 10.0, Expected 10.0
Current correct count: 154


892it [24:20,  1.66s/it]

Match: Predicted 55.0, Expected 55.0
Current correct count: 155


896it [24:27,  1.58s/it]

Mismatch: Predicted 77.0, Expected 74.0


897it [24:28,  1.67s/it]

Match: Predicted 250.0, Expected 250.0
Current correct count: 156


898it [24:30,  1.61s/it]

Mismatch: Predicted 3.0, Expected 1.0


900it [24:33,  1.66s/it]

Match: Predicted 16.0, Expected 16.0
Current correct count: 157


903it [24:39,  1.78s/it]

Match: Predicted 8.0, Expected 8.0
Current correct count: 158


905it [24:42,  1.73s/it]

Match: Predicted 8.0, Expected 8.0
Current correct count: 159


906it [24:44,  1.65s/it]

Mismatch: Predicted 1.0, Expected 5.0


908it [24:47,  1.60s/it]

Match: Predicted 16.0, Expected 16.0
Current correct count: 160


910it [24:50,  1.63s/it]

Match: Predicted 38.0, Expected 38.0
Current correct count: 161


911it [24:52,  1.56s/it]

Match: Predicted 700.0, Expected 700.0
Current correct count: 162


912it [24:54,  1.61s/it]

Mismatch: Predicted 12.8, Expected 64.0


915it [24:58,  1.56s/it]

Match: Predicted 3.0, Expected 3.0
Current correct count: 163


916it [25:00,  1.66s/it]

Mismatch: Predicted 2.0, Expected 23.0


918it [25:03,  1.56s/it]

Match: Predicted 12.0, Expected 12.0
Current correct count: 164


919it [25:06,  1.81s/it]

Mismatch: Predicted 5.0, Expected 56.0


921it [25:09,  1.71s/it]

Mismatch: Predicted 88.0, Expected 47.0


923it [25:12,  1.74s/it]

Match: Predicted 60.0, Expected 60.0
Current correct count: 165


924it [25:14,  1.77s/it]

Mismatch: Predicted 4.0, Expected 2.0


930it [25:25,  1.66s/it]

Match: Predicted 60.0, Expected 60.0
Current correct count: 166


941it [25:46,  1.81s/it]

Match: Predicted 280.0, Expected 280.0
Current correct count: 167


943it [25:49,  1.76s/it]

Mismatch: Predicted 166.25, Expected 205.0


947it [25:56,  1.63s/it]

Match: Predicted 450.0, Expected 450.0
Current correct count: 168


948it [25:58,  1.72s/it]

Mismatch: Predicted 2.0, Expected 13.0


950it [26:01,  1.69s/it]

Mismatch: Predicted 78.0, Expected 42.0


952it [26:04,  1.61s/it]

Match: Predicted 300.0, Expected 300.0
Current correct count: 169


955it [26:09,  1.59s/it]

Match: Predicted 34.0, Expected 34.0
Current correct count: 170


957it [26:12,  1.54s/it]

Mismatch: Predicted 12.0, Expected 1.0


958it [26:14,  1.60s/it]

Match: Predicted 45.0, Expected 45.0
Current correct count: 171


959it [26:16,  1.63s/it]

Mismatch: Predicted 38.4, Expected 40.0


960it [26:18,  1.74s/it]

Mismatch: Predicted 8.0, Expected 7.0


970it [26:35,  1.65s/it]

Match: Predicted 1800.0, Expected 1800.0
Current correct count: 172


971it [26:36,  1.55s/it]

Match: Predicted 33.0, Expected 33.0
Current correct count: 173


972it [26:38,  1.57s/it]

Mismatch: Predicted 50.0, Expected 100.0


975it [26:42,  1.43s/it]

Mismatch: Predicted 6.0, Expected 5.0


979it [26:49,  1.63s/it]

Mismatch: Predicted 140.0, Expected 160.0


984it [26:58,  1.61s/it]

Mismatch: Predicted 2.5, Expected 15.0


985it [27:00,  1.60s/it]

Mismatch: Predicted 504.0, Expected 342.0


986it [27:01,  1.49s/it]

Match: Predicted 63.0, Expected 63.0
Current correct count: 174


988it [27:04,  1.44s/it]

Mismatch: Predicted 25.0, Expected 3.0


991it [27:08,  1.38s/it]

Match: Predicted 14.0, Expected 14.0
Current correct count: 175


996it [27:16,  1.49s/it]

Mismatch: Predicted 18.0, Expected 12.0


1009it [27:39,  1.64s/it]

Match: Predicted 230.0, Expected 230.0
Current correct count: 176


1011it [27:43,  1.80s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 177


1016it [27:51,  1.74s/it]

Mismatch: Predicted 26.0, Expected 78.0


1019it [27:57,  1.72s/it]

Mismatch: Predicted 10.0, Expected 60.0


1021it [28:00,  1.57s/it]

Match: Predicted 22.0, Expected 22.0
Current correct count: 178


1024it [28:05,  1.59s/it]

Match: Predicted 77.0, Expected 77.0
Current correct count: 179


1026it [28:08,  1.66s/it]

Mismatch: Predicted 100.0, Expected 30.0


1028it [28:12,  1.79s/it]

Match: Predicted 12.0, Expected 12.0
Current correct count: 180


1029it [28:14,  1.72s/it]

Match: Predicted 200.0, Expected 200.0
Current correct count: 181


1032it [28:19,  1.71s/it]

Mismatch: Predicted 6.0, Expected 5.0


1033it [28:20,  1.59s/it]

Mismatch: Predicted 225.0, Expected 450.0


1042it [28:35,  1.52s/it]

Mismatch: Predicted 104.0, Expected 101.0


1046it [28:42,  1.67s/it]

Mismatch: Predicted 720.0, Expected 420.0


1048it [28:45,  1.58s/it]

Mismatch: Predicted 3.0, Expected 10.0


1050it [28:48,  1.52s/it]

Mismatch: Predicted 35.0, Expected 20.0


1056it [29:01,  1.99s/it]

Match: Predicted 2304.0, Expected 2304.0
Current correct count: 182


1057it [29:02,  1.70s/it]

Mismatch: Predicted 104.0, Expected 156.0


1059it [29:05,  1.54s/it]

Mismatch: Predicted 300.0, Expected 250.0


1060it [29:06,  1.43s/it]

Mismatch: Predicted 24.0, Expected 2.0


1062it [29:09,  1.48s/it]

Mismatch: Predicted 62.0, Expected 58.0


1063it [29:10,  1.38s/it]

Mismatch: Predicted 364.0, Expected 482.0


1064it [29:12,  1.46s/it]

Match: Predicted 320.0, Expected 320.0
Current correct count: 183


1066it [29:15,  1.44s/it]

Mismatch: Predicted 100.0, Expected 95.0


1075it [29:31,  1.74s/it]

Match: Predicted 750.0, Expected 750.0
Current correct count: 184


1076it [29:32,  1.52s/it]

Mismatch: Predicted 10.0, Expected 16.0


1079it [29:38,  1.66s/it]

Mismatch: Predicted 19.0, Expected 8.0


1080it [29:39,  1.68s/it]

Match: Predicted 10.0, Expected 10.0
Current correct count: 185


1082it [29:42,  1.42s/it]

Match: Predicted 195.0, Expected 195.0
Current correct count: 186


1083it [29:43,  1.35s/it]

Mismatch: Predicted 1.5, Expected 2.0


1085it [29:47,  1.52s/it]

Match: Predicted 20.0, Expected 20.0
Current correct count: 187


1088it [29:51,  1.50s/it]

Mismatch: Predicted 2.0, Expected 12.0


1092it [29:59,  1.70s/it]

Match: Predicted 1000.0, Expected 1000.0
Current correct count: 188


1095it [30:04,  1.68s/it]

Mismatch: Predicted 27.0, Expected 25.0


1096it [30:05,  1.63s/it]

Mismatch: Predicted 90.0, Expected 270.0


1099it [30:10,  1.63s/it]

Match: Predicted 30.0, Expected 30.0
Current correct count: 189


1101it [30:13,  1.56s/it]

Mismatch: Predicted 10.0, Expected 5.0


1105it [30:20,  1.50s/it]

Match: Predicted 24.0, Expected 24.0
Current correct count: 190


1107it [30:23,  1.49s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 191


1109it [30:26,  1.58s/it]

Match: Predicted 38.0, Expected 38.0
Current correct count: 192


1113it [30:33,  1.73s/it]

Mismatch: Predicted 12.0, Expected 5.0


1115it [30:35,  1.31s/it]

Match: Predicted 8.0, Expected 8.0
Current correct count: 193


1117it [30:38,  1.44s/it]

Match: Predicted 60.0, Expected 60.0
Current correct count: 194


1122it [30:47,  1.78s/it]

Match: Predicted 35.0, Expected 35.0
Current correct count: 195


1128it [30:58,  1.69s/it]

Mismatch: Predicted 300.0, Expected 750.0


1137it [31:13,  1.63s/it]

Match: Predicted 26.0, Expected 26.0
Current correct count: 196


1143it [31:24,  1.74s/it]

Match: Predicted 9.0, Expected 9.0
Current correct count: 197


1145it [31:27,  1.61s/it]

Match: Predicted 34.0, Expected 34.0
Current correct count: 198


1149it [31:34,  1.56s/it]

Match: Predicted 9.0, Expected 9.0
Current correct count: 199


1150it [31:36,  1.59s/it]

Mismatch: Predicted 309.15, Expected 93.0


1151it [31:37,  1.53s/it]

Mismatch: Predicted 39.0, Expected 21.0


1153it [31:40,  1.44s/it]

Match: Predicted 12.0, Expected 12.0
Current correct count: 200


1154it [31:42,  1.79s/it]

Mismatch: Predicted 10.0, Expected 20.0


1155it [31:44,  1.70s/it]

Match: Predicted 30.0, Expected 30.0
Current correct count: 201


1156it [31:46,  1.68s/it]

Match: Predicted 13.0, Expected 13.0
Current correct count: 202


1158it [31:50,  1.87s/it]

Match: Predicted 3.0, Expected 3.0
Current correct count: 203


1161it [31:55,  1.82s/it]

Match: Predicted 1125.0, Expected 1125.0
Current correct count: 204


1163it [31:59,  1.78s/it]

Match: Predicted 3.0, Expected 3.0
Current correct count: 205


1164it [32:00,  1.51s/it]

Match: Predicted 12.0, Expected 12.0
Current correct count: 206


1165it [32:01,  1.49s/it]

Mismatch: Predicted 8.0, Expected 9.0


1168it [32:06,  1.55s/it]

Match: Predicted 120.0, Expected 120.0
Current correct count: 207


1173it [32:14,  1.55s/it]

Match: Predicted 93.0, Expected 93.0
Current correct count: 208


1179it [32:25,  1.74s/it]

Mismatch: Predicted 42.0, Expected 17.0


1182it [32:30,  1.74s/it]

Mismatch: Predicted 11.75, Expected 20.0


1184it [32:33,  1.58s/it]

Mismatch: Predicted 15.0, Expected 11.0


1185it [32:35,  1.57s/it]

Mismatch: Predicted 1224.0, Expected 306.0


1187it [32:38,  1.59s/it]

Match: Predicted 19.0, Expected 19.0
Current correct count: 209


1189it [32:41,  1.38s/it]

Mismatch: Predicted 9.0, Expected 24.0


1190it [32:42,  1.39s/it]

Match: Predicted 6.0, Expected 6.0
Current correct count: 210


1192it [32:45,  1.54s/it]

Match: Predicted 100.0, Expected 100.0
Current correct count: 211


1193it [32:47,  1.56s/it]

Mismatch: Predicted 21.25, Expected 280.0


1194it [32:48,  1.41s/it]

Mismatch: Predicted 16.5, Expected 9.0


1201it [32:59,  1.38s/it]

Match: Predicted 8.0, Expected 8.0
Current correct count: 212


1206it [33:07,  1.37s/it]

Mismatch: Predicted 1.0, Expected 4.0


1208it [33:10,  1.54s/it]

Match: Predicted 64.0, Expected 64.0
Current correct count: 213


1211it [33:14,  1.42s/it]

Mismatch: Predicted 12.0, Expected 288.0


1212it [33:16,  1.51s/it]

Mismatch: Predicted 496.0, Expected 448.0


1213it [33:18,  1.56s/it]

Match: Predicted 150.0, Expected 150.0
Current correct count: 214


1214it [33:19,  1.43s/it]

Mismatch: Predicted 310.0, Expected 31.0


1215it [33:21,  1.53s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 215


1219it [33:28,  1.74s/it]

Mismatch: Predicted 7.5, Expected 225.0


1221it [33:31,  1.54s/it]

Match: Predicted 32.0, Expected 32.0
Current correct count: 216


1225it [33:37,  1.56s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 217


1227it [33:41,  1.63s/it]

Mismatch: Predicted 1.0, Expected 90.0


1228it [33:42,  1.59s/it]

Mismatch: Predicted 54.0, Expected 66.0


1229it [33:44,  1.59s/it]

Match: Predicted 31.0, Expected 31.0
Current correct count: 218


1233it [33:51,  1.59s/it]

Match: Predicted 15.0, Expected 15.0
Current correct count: 219


1235it [33:54,  1.71s/it]

Mismatch: Predicted 27.0, Expected 12.0


1238it [34:00,  1.75s/it]

Match: Predicted 78.0, Expected 78.0
Current correct count: 220


1245it [34:14,  1.90s/it]

Mismatch: Predicted 4.0, Expected 35.0


1246it [34:15,  1.69s/it]

Mismatch: Predicted 18.5, Expected 96.0


1247it [34:17,  1.52s/it]

Match: Predicted 3360.0, Expected 3360.0
Current correct count: 221


1252it [34:25,  1.66s/it]

Mismatch: Predicted 20.0, Expected 30.0


1254it [34:28,  1.58s/it]

Match: Predicted 120.0, Expected 120.0
Current correct count: 222


1256it [34:31,  1.55s/it]

Mismatch: Predicted 13.0, Expected 12.0


1261it [34:41,  1.86s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 223


1268it [34:52,  1.64s/it]

Mismatch: Predicted 60.0, Expected 40.0


1274it [35:03,  1.64s/it]

Match: Predicted 27.0, Expected 27.0
Current correct count: 224


1277it [35:08,  1.68s/it]

Mismatch: Predicted 160.0, Expected 70.0


1278it [35:09,  1.58s/it]

Mismatch: Predicted 43.0, Expected 48.0


1279it [35:11,  1.62s/it]

Match: Predicted 665.0, Expected 665.0
Current correct count: 225


1280it [35:12,  1.59s/it]

Match: Predicted 180.0, Expected 180.0
Current correct count: 226


1281it [35:14,  1.54s/it]

Mismatch: Predicted 12.0, Expected 7.0


1286it [35:23,  1.72s/it]

Match: Predicted 1218.0, Expected 1218.0
Current correct count: 227


1287it [35:24,  1.71s/it]

Mismatch: Predicted 95.0, Expected 105.0


1292it [35:33,  1.58s/it]

Mismatch: Predicted 9.0, Expected 27.0


1294it [35:35,  1.47s/it]

Match: Predicted 140000.0, Expected 140000.0
Current correct count: 228


1295it [35:37,  1.55s/it]

Mismatch: Predicted 28.0, Expected 36.0


1300it [35:46,  1.71s/it]

Mismatch: Predicted 20.5, Expected 13.0


1303it [35:51,  1.66s/it]

Match: Predicted 72.0, Expected 72.0
Current correct count: 229


1306it [35:56,  1.62s/it]

Match: Predicted 2325.0, Expected 2325.0
Current correct count: 230


1309it [36:00,  1.47s/it]

Mismatch: Predicted 20.0, Expected 30.0


1311it [36:04,  1.70s/it]

Mismatch: Predicted 72.0, Expected 64.0


1314it [36:09,  1.80s/it]

Mismatch: Predicted 2.5, Expected 2.0


1319it [36:18,  1.65s/it]

Mismatch: Predicted 1.75, Expected 14.0
Final correct count: 230
Accuracy on validation dataset: 48.421052631578945%





In [4]:
print(f"accuracy: {accuracy}")

accuracy: 48.421052631578945
