In [7]:
import requests
import json
import re
from tqdm import tqdm

# API URL for Fireworks model completion endpoint
api_url = "https://api.fireworks.ai/inference/v1/completions"

# Authorization token (replace with your actual Fireworks API Key)
api_key = "fw_3ZZYwEmpqUDubGv4AqyMTHzx"

# The deployed model identifier (from your output)
model_id = "accounts/parthiv21078-d741a7/deployedModels/f3fc9dbff9b34e93aac6c1cd152a3916-45c5bbc1"

# Function to query the model
def query_model(prompt):
    headers = {
        'Authorization': f'Bearer {api_key}',
        'Content-Type': 'application/json'
    }

    data = {
        "model": model_id,
        "prompt": prompt,
        "max_tokens": 100
    }

    response = requests.post(api_url, headers=headers, json=data)
    
    if response.status_code == 200:
        return response.json()["choices"][0]["text"]
    else:
        print(f"Error querying model: {response.status_code}, {response.text}")
        return None


def extract_number_from_output(output):
    match = re.search(r"####\s*([\d,]+(\.\d+)?)", output)
    if match:
        return float(match.group(1).replace(',', ''))
    return None

# Load the validation dataset (GSM8K) from a JSONL file
def load_validation_dataset(file_path):
    dataset = []
    with open(file_path, 'r') as f:
        for line in f:
            dataset.append(json.loads(line))
    return dataset



correct = 0
total = 0
reasoning_outputs = []

# Load the validation dataset and process it
validation_file_path = "gsm8k_val.jsonl"
reasoning_output_file = "reasoning_outputs.jsonl"

# File to save the progress
progress_file = "correct_progress.txt"

dataset = load_validation_dataset(validation_file_path)

for i, example in tqdm(enumerate(dataset)):
    example = dataset[i]
    prompt = "Give the final answer as #### <answer> where answer is the final numerical value ," + example['question']  # Use 'prompt' field to query the model
    try:
        ground_truth = float(re.search(r"####\s*(\d+(\.\d+)?)", example['answer']).group(1))
    except:
        continue

    if ground_truth is None:
        continue
    
    # Query the model with the prompt
    model_output = query_model(prompt)
    
    if model_output is None:
        continue

    # Extract the predicted final answer from the output
    predicted_number = extract_number_from_output(model_output)
    
    if predicted_number is None:
        continue
    else:
        reasoning_outputs.append({
            "question": example["question"],
            "model_output": model_output,
            "ground_truth": ground_truth
        })
        
        # Compare the predicted number with the ground truth
        if predicted_number == ground_truth:
            correct += 1
            print(f"Match: Predicted {predicted_number}, Expected {ground_truth}")
            print(f"Current correct count: {correct}")  # Print the current value of correct after each match
        else:
            print(f"Mismatch: Predicted {predicted_number}, Expected {ground_truth}")

        total += 1

    # Save progress to a file
    with open(progress_file, "w") as f:
        f.write(f"Correct: {correct}, Total: {total}\n")

# Save the reasoning outputs to a separate JSONL file
with open(progress_file, 'w') as f:
    for reasoning in reasoning_outputs:
        json.dump(reasoning, f)
        f.write("\n")

# Print final correct count before calculating accuracy
print(f"Final correct count: {correct}")  # Final correct count
accuracy = (correct / total) * 100
print(f"Accuracy on validation dataset: {accuracy}%")

1it [00:01,  1.65s/it]

Match: Predicted 18.0, Expected 18.0
Current correct count: 1


2it [00:02,  1.17s/it]

Match: Predicted 3.0, Expected 3.0
Current correct count: 2


3it [00:03,  1.09it/s]

Mismatch: Predicted 25000.0, Expected 70000.0


4it [00:04,  1.08it/s]

Match: Predicted 540.0, Expected 540.0
Current correct count: 3


7it [00:07,  1.06s/it]

Match: Predicted 260.0, Expected 260.0
Current correct count: 4


11it [00:12,  1.19s/it]

Match: Predicted 366.0, Expected 366.0
Current correct count: 5


14it [00:15,  1.00s/it]

Mismatch: Predicted 11.0, Expected 18.0


17it [00:18,  1.05it/s]

Match: Predicted 230.0, Expected 230.0
Current correct count: 6


19it [00:20,  1.07s/it]

Mismatch: Predicted 30.0, Expected 7.0


22it [00:24,  1.23s/it]

Mismatch: Predicted 2.0, Expected 14.0


23it [00:25,  1.31s/it]

Mismatch: Predicted 3.0, Expected 7.0


24it [00:26,  1.16s/it]

Match: Predicted 8.0, Expected 8.0
Current correct count: 7


25it [00:27,  1.07s/it]

Mismatch: Predicted 24.75, Expected 26.0


26it [00:28,  1.07s/it]

Mismatch: Predicted 2.75, Expected 2.0


27it [00:29,  1.02s/it]

Mismatch: Predicted 127.5, Expected 243.0


29it [00:31,  1.04it/s]

Mismatch: Predicted 45.0, Expected 25.0


30it [00:32,  1.02it/s]

Match: Predicted 104.0, Expected 104.0
Current correct count: 8


33it [00:36,  1.13s/it]

Match: Predicted 35.0, Expected 35.0
Current correct count: 9


35it [00:38,  1.26s/it]

Match: Predicted 23.0, Expected 23.0
Current correct count: 10


41it [00:45,  1.11s/it]

Match: Predicted 8.0, Expected 8.0
Current correct count: 11


42it [00:46,  1.10s/it]

Match: Predicted 200.0, Expected 200.0
Current correct count: 12


43it [00:47,  1.05it/s]

Mismatch: Predicted 9.0, Expected 26.0


48it [00:53,  1.14s/it]

Mismatch: Predicted 500.0, Expected 800.0


49it [00:54,  1.08s/it]

Match: Predicted 8.0, Expected 8.0
Current correct count: 13


51it [00:56,  1.14s/it]

Mismatch: Predicted 3528.0, Expected 294.0


52it [00:57,  1.04s/it]

Mismatch: Predicted 1.0, Expected 5.0


55it [01:01,  1.09s/it]

Match: Predicted 40.0, Expected 40.0
Current correct count: 14


56it [01:02,  1.16s/it]

Match: Predicted 14.0, Expected 14.0
Current correct count: 15


57it [01:03,  1.08s/it]

Mismatch: Predicted 4.0, Expected 3.0


58it [01:04,  1.07s/it]

Match: Predicted 83.0, Expected 83.0
Current correct count: 16


60it [01:06,  1.05s/it]

Match: Predicted 187.0, Expected 187.0
Current correct count: 17


62it [01:08,  1.07s/it]

Mismatch: Predicted 2600.0, Expected 1430.0


65it [01:12,  1.07s/it]

Mismatch: Predicted 2.0, Expected 300.0


68it [01:15,  1.16s/it]

Mismatch: Predicted 455.0, Expected 595.0


70it [01:18,  1.25s/it]

Mismatch: Predicted 140.0, Expected 60.0


72it [01:20,  1.18s/it]

Match: Predicted 60.0, Expected 60.0
Current correct count: 18


73it [01:21,  1.19s/it]

Match: Predicted 221.0, Expected 221.0
Current correct count: 19


80it [01:30,  1.18s/it]

Match: Predicted 70.0, Expected 70.0
Current correct count: 20


81it [01:31,  1.05s/it]

Mismatch: Predicted 16.0, Expected 10.0


83it [01:33,  1.11s/it]

Match: Predicted 623.0, Expected 623.0
Current correct count: 21


84it [01:34,  1.00s/it]

Match: Predicted 600.0, Expected 600.0
Current correct count: 22


85it [01:35,  1.14it/s]

Mismatch: Predicted 20.0, Expected 15.0


87it [01:37,  1.11it/s]

Mismatch: Predicted 16.0, Expected 22.0


89it [01:39,  1.02s/it]

Match: Predicted 8000.0, Expected 8000.0
Current correct count: 23


90it [01:40,  1.00s/it]

Mismatch: Predicted 32.0, Expected 24.0


92it [01:43,  1.21s/it]

Match: Predicted 28.0, Expected 28.0
Current correct count: 24


93it [01:44,  1.18s/it]

Match: Predicted 4.0, Expected 4.0
Current correct count: 25


95it [01:46,  1.17s/it]

Mismatch: Predicted 192.0, Expected 348.0


97it [01:48,  1.11s/it]

Mismatch: Predicted 4.0, Expected 3.0


98it [01:49,  1.06it/s]

Mismatch: Predicted 32.0, Expected 12.0


100it [01:51,  1.05s/it]

Mismatch: Predicted 18.0, Expected 58.0


103it [01:56,  1.37s/it]

Mismatch: Predicted 30.0, Expected 26.0


104it [01:57,  1.29s/it]

Match: Predicted 140.0, Expected 140.0
Current correct count: 26


106it [01:59,  1.18s/it]

Match: Predicted 20.0, Expected 20.0
Current correct count: 27


109it [02:02,  1.07s/it]

Mismatch: Predicted 52.5, Expected 50.0


111it [02:04,  1.02s/it]

Mismatch: Predicted 19.0, Expected 45.0


112it [02:05,  1.01it/s]

Mismatch: Predicted 128.0, Expected 16.0


113it [02:07,  1.19s/it]

Mismatch: Predicted 12.0, Expected 24.0


114it [02:07,  1.01s/it]

Match: Predicted 25.0, Expected 25.0
Current correct count: 28


118it [02:11,  1.03it/s]

Mismatch: Predicted 6.0, Expected 360.0


121it [02:15,  1.12s/it]

Mismatch: Predicted 56.0, Expected 240.0


122it [02:16,  1.09s/it]

Match: Predicted 27.0, Expected 27.0
Current correct count: 29


124it [02:18,  1.11s/it]

Mismatch: Predicted 65.0, Expected 50.0


125it [02:19,  1.12s/it]

Mismatch: Predicted 50.0, Expected 10.0


127it [02:21,  1.01it/s]

Mismatch: Predicted 44.0, Expected 82.0


128it [02:22,  1.04it/s]

Match: Predicted 120.0, Expected 120.0
Current correct count: 30


132it [02:28,  1.25s/it]

Match: Predicted 940.0, Expected 940.0
Current correct count: 31


134it [02:30,  1.11s/it]

Mismatch: Predicted 31.0, Expected 13.0


135it [02:31,  1.02s/it]

Match: Predicted 720.0, Expected 720.0
Current correct count: 32


136it [02:32,  1.02s/it]

Mismatch: Predicted 55.0, Expected 40.0


137it [02:33,  1.06s/it]

Mismatch: Predicted 18.0, Expected 6.0


140it [02:37,  1.19s/it]

Match: Predicted 70.0, Expected 70.0
Current correct count: 33


141it [02:37,  1.03s/it]

Mismatch: Predicted 4.0, Expected 20.0


142it [02:38,  1.04it/s]

Mismatch: Predicted 350.0, Expected 400.0


144it [02:41,  1.12s/it]

Mismatch: Predicted 14.0, Expected 16.0


145it [02:42,  1.06s/it]

Mismatch: Predicted 81.0, Expected 20.0


146it [02:43,  1.06s/it]

Mismatch: Predicted 40.0, Expected 4000.0


150it [02:47,  1.02s/it]

Mismatch: Predicted 48.0, Expected 16.0


153it [02:51,  1.16s/it]

Mismatch: Predicted 9.0, Expected 4.0


154it [02:52,  1.09s/it]

Mismatch: Predicted 35.0, Expected 48.0


157it [02:56,  1.26s/it]

Mismatch: Predicted 1800.0, Expected 1400.0


159it [02:58,  1.29s/it]

Match: Predicted 34.0, Expected 34.0
Current correct count: 34


160it [02:59,  1.18s/it]

Mismatch: Predicted 19.0, Expected 15.0


161it [03:00,  1.09s/it]

Match: Predicted 16.0, Expected 16.0
Current correct count: 35


164it [03:03,  1.05it/s]

Mismatch: Predicted 34.0, Expected 50.0


167it [03:06,  1.10it/s]

Mismatch: Predicted 45.0, Expected 5.0


168it [03:07,  1.03it/s]

Mismatch: Predicted 6.0, Expected 16.0


169it [03:08,  1.10it/s]

Match: Predicted 18.0, Expected 18.0
Current correct count: 36


176it [03:16,  1.14s/it]

Mismatch: Predicted 4.0, Expected 15.0


177it [03:17,  1.14s/it]

Mismatch: Predicted 80.0, Expected 100.0


179it [03:20,  1.13s/it]

Match: Predicted 122.0, Expected 122.0
Current correct count: 37


180it [03:20,  1.08s/it]

Match: Predicted 130.0, Expected 130.0
Current correct count: 38


181it [03:21,  1.03s/it]

Match: Predicted 20.0, Expected 20.0
Current correct count: 39


182it [03:22,  1.04s/it]

Mismatch: Predicted 20.0, Expected 160.0


183it [03:23,  1.00s/it]

Match: Predicted 23.0, Expected 23.0
Current correct count: 40


186it [03:28,  1.22s/it]

Match: Predicted 30.0, Expected 30.0
Current correct count: 41


188it [03:30,  1.11s/it]

Match: Predicted 106.0, Expected 106.0
Current correct count: 42


191it [03:33,  1.02it/s]

Mismatch: Predicted 120.0, Expected 360.0


192it [03:34,  1.03it/s]

Match: Predicted 5.0, Expected 5.0
Current correct count: 43


195it [03:37,  1.04it/s]

Mismatch: Predicted 18.0, Expected 10.0


196it [03:38,  1.13it/s]

Mismatch: Predicted 150.0, Expected 12.0


197it [03:39,  1.07it/s]

Match: Predicted 120.0, Expected 120.0
Current correct count: 44


198it [03:40,  1.07it/s]

Mismatch: Predicted 15377.0, Expected 6277.0


203it [03:46,  1.15s/it]

Match: Predicted 100.0, Expected 100.0
Current correct count: 45


204it [03:46,  1.04s/it]

Mismatch: Predicted 18.0, Expected 31.0


209it [03:54,  1.24s/it]

Match: Predicted 76.0, Expected 76.0
Current correct count: 46


213it [03:59,  1.18s/it]

Mismatch: Predicted 1.0, Expected 5.0


214it [04:00,  1.04s/it]

Mismatch: Predicted 350.0, Expected 250.0


216it [04:01,  1.06it/s]

Match: Predicted 44.0, Expected 44.0
Current correct count: 47


218it [04:03,  1.10it/s]

Mismatch: Predicted 11.0, Expected 15.0


221it [04:07,  1.08s/it]

Match: Predicted 70.0, Expected 70.0
Current correct count: 48


223it [04:09,  1.06it/s]

Mismatch: Predicted 130.0, Expected 140.0


224it [04:10,  1.01s/it]

Match: Predicted 20000.0, Expected 20000.0
Current correct count: 49


225it [04:10,  1.12it/s]

Mismatch: Predicted 48.0, Expected 180.0


232it [04:19,  1.00it/s]

Mismatch: Predicted 10.0, Expected 50.0


233it [04:20,  1.10s/it]

Match: Predicted 75.0, Expected 75.0
Current correct count: 50


234it [04:21,  1.11s/it]

Match: Predicted 12.0, Expected 12.0
Current correct count: 51


236it [04:23,  1.13s/it]

Match: Predicted 10.0, Expected 10.0
Current correct count: 52


237it [04:24,  1.03it/s]

Mismatch: Predicted 125.0, Expected 31.0


238it [04:26,  1.15s/it]

Mismatch: Predicted 70.0, Expected 90.0


241it [04:29,  1.19s/it]

Mismatch: Predicted 27.0, Expected 21.0


242it [04:30,  1.02s/it]

Mismatch: Predicted 7.0, Expected 6.0


245it [04:33,  1.05it/s]

Mismatch: Predicted 17.0, Expected 20.0


248it [04:36,  1.01s/it]

Mismatch: Predicted 30.0, Expected 32.0


249it [04:37,  1.07it/s]

Mismatch: Predicted 120.0, Expected 300.0


252it [04:40,  1.03it/s]

Match: Predicted 70.0, Expected 70.0
Current correct count: 53


254it [04:42,  1.06it/s]

Match: Predicted 18.0, Expected 18.0
Current correct count: 54


255it [04:43,  1.02it/s]

Mismatch: Predicted 36.0, Expected 84.0


256it [04:44,  1.00it/s]

Mismatch: Predicted 16.0, Expected 192.0


258it [04:47,  1.18s/it]

Mismatch: Predicted 560.0, Expected 5600.0


259it [04:48,  1.14s/it]

Match: Predicted 6.0, Expected 6.0
Current correct count: 55


260it [04:49,  1.11s/it]

Mismatch: Predicted 90.0, Expected 168.0


262it [04:51,  1.06s/it]

Match: Predicted 62.0, Expected 62.0
Current correct count: 56


263it [04:52,  1.06s/it]

Match: Predicted 270.0, Expected 270.0
Current correct count: 57


264it [04:53,  1.02s/it]

Mismatch: Predicted 14.0, Expected 8.0


265it [04:54,  1.08it/s]

Mismatch: Predicted 175.0, Expected 400.0


269it [04:59,  1.12s/it]

Mismatch: Predicted 2875.0, Expected 1375.0


270it [05:00,  1.02s/it]

Mismatch: Predicted 16.0, Expected 4.0


271it [05:01,  1.19s/it]

Match: Predicted 762.0, Expected 762.0
Current correct count: 58


273it [05:04,  1.16s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 59


276it [05:07,  1.13s/it]

Mismatch: Predicted 222.0, Expected 138.0


279it [05:10,  1.02it/s]

Mismatch: Predicted 30.0, Expected 40.0


280it [05:11,  1.01s/it]

Match: Predicted 6.0, Expected 6.0
Current correct count: 60


283it [05:15,  1.12s/it]

Mismatch: Predicted 180.0, Expected 195.0


284it [05:16,  1.15s/it]

Mismatch: Predicted 12.5, Expected 68.0


287it [05:19,  1.12s/it]

Mismatch: Predicted 20.0, Expected 90.0


288it [05:20,  1.13s/it]

Mismatch: Predicted 14.0, Expected 8.0


289it [05:22,  1.11s/it]

Match: Predicted 3.0, Expected 3.0
Current correct count: 61


291it [05:23,  1.03it/s]

Mismatch: Predicted 220.0, Expected 390.0


292it [05:26,  1.38s/it]

Match: Predicted 2.0, Expected 2.0
Current correct count: 62


297it [05:31,  1.14s/it]

Mismatch: Predicted 4.0, Expected 3.0


299it [05:33,  1.07s/it]

Mismatch: Predicted 400.0, Expected 500.0


301it [05:36,  1.05s/it]

Match: Predicted 78.0, Expected 78.0
Current correct count: 63


302it [05:36,  1.12it/s]

Mismatch: Predicted 64.0, Expected 8.0


303it [05:37,  1.25it/s]

Mismatch: Predicted 20.0, Expected 15.0


306it [05:40,  1.12it/s]

Match: Predicted 4.0, Expected 4.0
Current correct count: 64


313it [05:47,  1.02s/it]

Mismatch: Predicted 8.0, Expected 32.0


318it [05:53,  1.07s/it]

Mismatch: Predicted 24.0, Expected 12.0


320it [05:56,  1.29s/it]

Mismatch: Predicted 11.0, Expected 15.0


322it [05:58,  1.14s/it]

Match: Predicted 1.0, Expected 1.0
Current correct count: 65


323it [05:59,  1.10s/it]

Mismatch: Predicted 3.0, Expected 9.0


325it [06:01,  1.10s/it]

Mismatch: Predicted 20.0, Expected 14.0


330it [06:08,  1.23s/it]

Match: Predicted 14.0, Expected 14.0
Current correct count: 66


331it [06:09,  1.19s/it]

Mismatch: Predicted 41.0, Expected 31.0


333it [06:11,  1.03s/it]

Match: Predicted 44.0, Expected 44.0
Current correct count: 67


335it [06:13,  1.07s/it]

Match: Predicted 6.0, Expected 6.0
Current correct count: 68


336it [06:15,  1.35s/it]

Match: Predicted 310.0, Expected 310.0
Current correct count: 69


338it [06:18,  1.40s/it]

Match: Predicted 1.0, Expected 1.0
Current correct count: 70


339it [06:19,  1.35s/it]

Mismatch: Predicted 90.0, Expected 60.0


341it [06:21,  1.27s/it]

Mismatch: Predicted 0.0, Expected 4.0


343it [06:24,  1.17s/it]

Mismatch: Predicted 63.0, Expected 87.0


345it [06:27,  1.46s/it]

Mismatch: Predicted 16.0, Expected 2.0


351it [06:36,  1.42s/it]

Mismatch: Predicted 7.0, Expected 8.0


352it [06:38,  1.38s/it]

Mismatch: Predicted 40.0, Expected 10.0


353it [06:38,  1.21s/it]

Match: Predicted 21.0, Expected 21.0
Current correct count: 71


356it [06:44,  1.49s/it]

Match: Predicted 34.0, Expected 34.0
Current correct count: 72


361it [06:51,  1.44s/it]

Mismatch: Predicted 85.0, Expected 25.0


366it [07:00,  1.73s/it]

Match: Predicted 18.0, Expected 18.0
Current correct count: 73


367it [07:01,  1.46s/it]

Match: Predicted 9.0, Expected 9.0
Current correct count: 74


368it [07:02,  1.31s/it]

Mismatch: Predicted 20.0, Expected 19.0


369it [07:03,  1.28s/it]

Mismatch: Predicted 8.0, Expected 18.0


371it [07:07,  1.48s/it]

Mismatch: Predicted 580.0, Expected 320.0


373it [07:10,  1.41s/it]

Mismatch: Predicted 7.0, Expected 5.0


374it [07:11,  1.45s/it]

Match: Predicted 240000.0, Expected 240000.0
Current correct count: 75


376it [07:14,  1.50s/it]

Match: Predicted 48.0, Expected 48.0
Current correct count: 76


377it [07:16,  1.53s/it]

Match: Predicted 15.0, Expected 15.0
Current correct count: 77


378it [07:17,  1.58s/it]

Match: Predicted 50.0, Expected 50.0
Current correct count: 78


380it [07:20,  1.51s/it]

Match: Predicted 21.0, Expected 21.0
Current correct count: 79


381it [07:22,  1.62s/it]

Mismatch: Predicted 73.0, Expected 803.0


382it [07:24,  1.67s/it]

Match: Predicted 67.0, Expected 67.0
Current correct count: 80


384it [07:29,  1.90s/it]

Mismatch: Predicted 432.0, Expected 2.0


385it [07:30,  1.77s/it]

Mismatch: Predicted 32.8, Expected 32.0


387it [07:34,  1.73s/it]

Mismatch: Predicted 72.0, Expected 80.0


388it [07:35,  1.64s/it]

Mismatch: Predicted 70.0, Expected 36.0


389it [07:37,  1.61s/it]

Match: Predicted 88.0, Expected 88.0
Current correct count: 81


391it [07:40,  1.66s/it]

Match: Predicted 12.0, Expected 12.0
Current correct count: 82


392it [07:42,  1.64s/it]

Mismatch: Predicted 1.0, Expected 15.0


396it [07:48,  1.62s/it]

Mismatch: Predicted 72.0, Expected 38.0


398it [07:52,  1.65s/it]

Match: Predicted 25.0, Expected 25.0
Current correct count: 83


400it [07:56,  1.86s/it]

Match: Predicted 12.0, Expected 12.0
Current correct count: 84


401it [07:56,  1.51s/it]

Mismatch: Predicted 6.0, Expected 48.0


413it [08:16,  1.56s/it]

Mismatch: Predicted 15.0, Expected 5.0


414it [08:18,  1.54s/it]

Mismatch: Predicted 2500.0, Expected 25.0


415it [08:19,  1.54s/it]

Match: Predicted 56.0, Expected 56.0
Current correct count: 85


417it [08:22,  1.49s/it]

Mismatch: Predicted 214.0, Expected 56.0


419it [08:26,  1.57s/it]

Match: Predicted 40.0, Expected 40.0
Current correct count: 86


420it [08:27,  1.40s/it]

Mismatch: Predicted 6000.0, Expected 3000.0


428it [08:39,  1.46s/it]

Mismatch: Predicted 33.0, Expected 150.0


429it [08:40,  1.48s/it]

Mismatch: Predicted 22.0, Expected 26.0


433it [08:47,  1.46s/it]

Mismatch: Predicted 60.0, Expected 30.0


434it [08:48,  1.48s/it]

Match: Predicted 6.0, Expected 6.0
Current correct count: 87


440it [08:59,  1.80s/it]

Mismatch: Predicted 21.0, Expected 17.0


441it [09:01,  1.76s/it]

Mismatch: Predicted 240.0, Expected 98.0


445it [09:08,  1.70s/it]

Match: Predicted 20.0, Expected 20.0
Current correct count: 88


448it [09:12,  1.45s/it]

Mismatch: Predicted 16.0, Expected 5.0


450it [09:15,  1.35s/it]

Mismatch: Predicted 128.0, Expected 112.0


452it [09:18,  1.59s/it]

Mismatch: Predicted 380.0, Expected 11050.0


453it [09:19,  1.38s/it]

Match: Predicted 50.0, Expected 50.0
Current correct count: 89


455it [09:22,  1.49s/it]

Mismatch: Predicted 240.0, Expected 150.0


459it [09:31,  1.91s/it]

Match: Predicted 35.0, Expected 35.0
Current correct count: 90


462it [09:36,  1.62s/it]

Mismatch: Predicted 16.0, Expected 9.0


463it [09:37,  1.46s/it]

Match: Predicted 71.0, Expected 71.0
Current correct count: 91


464it [09:38,  1.28s/it]

Mismatch: Predicted 3.0, Expected 18.0


468it [09:46,  1.86s/it]

Mismatch: Predicted 560.0, Expected 1200.0


469it [09:47,  1.72s/it]

Mismatch: Predicted 170.0, Expected 120.0


470it [09:48,  1.57s/it]

Match: Predicted 4.0, Expected 4.0
Current correct count: 92


473it [09:53,  1.45s/it]

Mismatch: Predicted 10.0, Expected 6.0


476it [09:58,  1.56s/it]

Mismatch: Predicted 60.0, Expected 20.0


477it [10:00,  1.60s/it]

Mismatch: Predicted 75.0, Expected 5.0


479it [10:03,  1.60s/it]

Mismatch: Predicted 607.0, Expected 621.0


481it [10:07,  1.65s/it]

Match: Predicted 11.0, Expected 11.0
Current correct count: 93


482it [10:08,  1.58s/it]

Match: Predicted 84.0, Expected 84.0
Current correct count: 94


483it [10:09,  1.41s/it]

Match: Predicted 26.0, Expected 26.0
Current correct count: 95


485it [10:12,  1.48s/it]

Match: Predicted 240.0, Expected 240.0
Current correct count: 96


486it [10:13,  1.38s/it]

Match: Predicted 220.0, Expected 220.0
Current correct count: 97


487it [10:15,  1.31s/it]

Mismatch: Predicted 60.0, Expected 6.0


489it [10:18,  1.40s/it]

Mismatch: Predicted 3.0, Expected 6.0


491it [10:20,  1.20s/it]

Match: Predicted 4.0, Expected 4.0
Current correct count: 98


492it [10:21,  1.33s/it]

Match: Predicted 16.0, Expected 16.0
Current correct count: 99


493it [10:22,  1.28s/it]

Match: Predicted 32.0, Expected 32.0
Current correct count: 100


496it [10:28,  1.55s/it]

Mismatch: Predicted 25.0, Expected 200.0


497it [10:29,  1.48s/it]

Match: Predicted 38.0, Expected 38.0
Current correct count: 101


504it [10:42,  1.74s/it]

Match: Predicted 18.0, Expected 18.0
Current correct count: 102


505it [10:43,  1.54s/it]

Mismatch: Predicted 1.75, Expected 2.0


506it [10:44,  1.44s/it]

Mismatch: Predicted 200.0, Expected 1.0


508it [10:49,  1.79s/it]

Mismatch: Predicted 228.0, Expected 2.0


509it [10:50,  1.77s/it]

Match: Predicted 120.0, Expected 120.0
Current correct count: 103


510it [10:52,  1.70s/it]

Mismatch: Predicted 7.0, Expected 4.0


512it [10:56,  1.84s/it]

Mismatch: Predicted 30.0, Expected 110.0


513it [10:58,  1.84s/it]

Match: Predicted 120.0, Expected 120.0
Current correct count: 104


515it [11:00,  1.49s/it]

Mismatch: Predicted 360000.0, Expected 90000.0


516it [11:02,  1.53s/it]

Mismatch: Predicted 32.0, Expected 160.0


517it [11:03,  1.39s/it]

Match: Predicted 375.0, Expected 375.0
Current correct count: 105


518it [11:04,  1.20s/it]

Mismatch: Predicted 72.0, Expected 18.0


520it [11:07,  1.35s/it]

Mismatch: Predicted 160.0, Expected 280.0


521it [11:09,  1.63s/it]

Match: Predicted 63.0, Expected 63.0
Current correct count: 106


523it [11:12,  1.69s/it]

Match: Predicted 29.0, Expected 29.0
Current correct count: 107


524it [11:14,  1.78s/it]

Match: Predicted 74.0, Expected 74.0
Current correct count: 108


526it [11:17,  1.49s/it]

Mismatch: Predicted 9.0, Expected 12.0


527it [11:18,  1.37s/it]

Mismatch: Predicted 10.5, Expected 21.0


528it [11:20,  1.50s/it]

Mismatch: Predicted 60.0, Expected 48.0


530it [11:23,  1.38s/it]

Mismatch: Predicted 12.0, Expected 11.0


533it [11:29,  1.79s/it]

Mismatch: Predicted 3.0, Expected 25.0


534it [11:31,  1.77s/it]

Mismatch: Predicted 900.0, Expected 300.0


535it [11:32,  1.49s/it]

Mismatch: Predicted 276.0, Expected 300.0


536it [11:33,  1.42s/it]

Mismatch: Predicted 7.0, Expected 16.0


537it [11:34,  1.26s/it]

Mismatch: Predicted 6.0, Expected 8.0


539it [11:37,  1.42s/it]

Match: Predicted 18.0, Expected 18.0
Current correct count: 109


540it [11:39,  1.52s/it]

Mismatch: Predicted 30.0, Expected 35.0


544it [11:46,  1.74s/it]

Match: Predicted 6.0, Expected 6.0
Current correct count: 110


546it [11:49,  1.62s/it]

Mismatch: Predicted 75.0, Expected 30.0


549it [11:54,  1.53s/it]

Match: Predicted 100.0, Expected 100.0
Current correct count: 111


550it [11:56,  1.60s/it]

Mismatch: Predicted 82.0, Expected 398.0


553it [12:01,  1.69s/it]

Mismatch: Predicted 1000.0, Expected 450.0


556it [12:06,  1.73s/it]

Match: Predicted 2.0, Expected 2.0
Current correct count: 112


559it [12:11,  1.60s/it]

Match: Predicted 3.0, Expected 3.0
Current correct count: 113


561it [12:14,  1.68s/it]

Match: Predicted 45.0, Expected 45.0
Current correct count: 114


565it [12:21,  1.56s/it]

Mismatch: Predicted 62.0, Expected 240.0


566it [12:22,  1.56s/it]

Mismatch: Predicted 2.0, Expected 5.0


567it [12:24,  1.61s/it]

Mismatch: Predicted 44.0, Expected 10.0


569it [12:29,  1.87s/it]

Match: Predicted 175.0, Expected 175.0
Current correct count: 115


573it [12:36,  1.78s/it]

Mismatch: Predicted 260.0, Expected 100.0


574it [12:37,  1.62s/it]

Mismatch: Predicted 2500.0, Expected 600.0


578it [12:45,  1.79s/it]

Mismatch: Predicted 50.0, Expected 40.0


580it [12:48,  1.62s/it]

Match: Predicted 7.0, Expected 7.0
Current correct count: 116


581it [12:50,  1.63s/it]

Mismatch: Predicted 524.0, Expected 500.0


582it [12:52,  1.69s/it]

Match: Predicted 215.0, Expected 215.0
Current correct count: 117


589it [13:05,  1.82s/it]

Mismatch: Predicted 56.0, Expected 52.0


593it [13:12,  1.78s/it]

Mismatch: Predicted 15.0, Expected 1.0


596it [13:18,  1.93s/it]

Mismatch: Predicted 18.0, Expected 13.0


597it [13:20,  1.83s/it]

Mismatch: Predicted 167.0, Expected 260.0


598it [13:21,  1.55s/it]

Mismatch: Predicted 30.0, Expected 90.0


601it [13:28,  1.99s/it]

Match: Predicted 10.0, Expected 10.0
Current correct count: 118


603it [13:31,  1.74s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 119


608it [13:41,  1.96s/it]

Mismatch: Predicted 20.0, Expected 10.0


610it [13:44,  1.90s/it]

Match: Predicted 7.0, Expected 7.0
Current correct count: 120


611it [13:46,  1.88s/it]

Mismatch: Predicted 21840.0, Expected 65.0


614it [13:52,  1.90s/it]

Match: Predicted 93000.0, Expected 93000.0
Current correct count: 121


616it [13:56,  1.84s/it]

Mismatch: Predicted 18.0, Expected 33.0


617it [13:57,  1.69s/it]

Match: Predicted 10.0, Expected 10.0
Current correct count: 122


618it [13:58,  1.65s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 123


621it [14:03,  1.56s/it]

Mismatch: Predicted 509.0, Expected 1509.0


622it [14:05,  1.61s/it]

Mismatch: Predicted 9000.0, Expected 3000.0


624it [14:08,  1.62s/it]

Mismatch: Predicted 12.0, Expected 8.0


625it [14:10,  1.69s/it]

Match: Predicted 85.0, Expected 85.0
Current correct count: 124


626it [14:13,  1.89s/it]

Match: Predicted 160.0, Expected 160.0
Current correct count: 125


627it [14:15,  1.93s/it]

Mismatch: Predicted 1.0, Expected 72.0


631it [14:23,  2.01s/it]

Mismatch: Predicted 40.0, Expected 10.0


633it [14:27,  1.89s/it]

Mismatch: Predicted 28.0, Expected 45.0


635it [14:30,  1.75s/it]

Match: Predicted 14.0, Expected 14.0
Current correct count: 126


638it [14:36,  1.84s/it]

Match: Predicted 17.0, Expected 17.0
Current correct count: 127


641it [14:42,  1.79s/it]

Mismatch: Predicted 36900.0, Expected 43.0


642it [14:43,  1.78s/it]

Mismatch: Predicted 42000.0, Expected 262500.0


644it [14:47,  1.81s/it]

Match: Predicted 840.0, Expected 840.0
Current correct count: 128


645it [14:49,  1.80s/it]

Match: Predicted 29.0, Expected 29.0
Current correct count: 129


646it [14:50,  1.54s/it]

Mismatch: Predicted 16.0, Expected 48.0


649it [14:56,  1.91s/it]

Mismatch: Predicted 12.0, Expected 54.0


652it [15:02,  1.94s/it]

Mismatch: Predicted 840.0, Expected 2100.0


653it [15:04,  1.84s/it]

Match: Predicted 75.0, Expected 75.0
Current correct count: 130


654it [15:05,  1.64s/it]

Mismatch: Predicted 120.0, Expected 80.0


656it [15:08,  1.57s/it]

Mismatch: Predicted 5.0, Expected 10.0


658it [15:12,  1.94s/it]

Mismatch: Predicted 3060000.0, Expected 330000.0


663it [15:22,  1.83s/it]

Mismatch: Predicted 20.0, Expected 7.0


664it [15:23,  1.60s/it]

Match: Predicted 193.0, Expected 193.0
Current correct count: 131


665it [15:24,  1.39s/it]

Mismatch: Predicted 40.0, Expected 32.0


669it [15:33,  1.76s/it]

Mismatch: Predicted 15.0, Expected 3.0


671it [15:36,  1.76s/it]

Match: Predicted 4.0, Expected 4.0
Current correct count: 132


672it [15:39,  1.88s/it]

Mismatch: Predicted 8.5, Expected 4.0


674it [15:42,  1.81s/it]

Match: Predicted 9.0, Expected 9.0
Current correct count: 133


678it [15:50,  1.79s/it]

Match: Predicted 36.0, Expected 36.0
Current correct count: 134


679it [15:51,  1.67s/it]

Mismatch: Predicted 60.0, Expected 120.0


680it [15:52,  1.50s/it]

Match: Predicted 576.0, Expected 576.0
Current correct count: 135


682it [15:58,  2.02s/it]

Mismatch: Predicted 169.0, Expected 298.0


683it [15:59,  1.83s/it]

Match: Predicted 80.0, Expected 80.0
Current correct count: 136


684it [16:01,  1.71s/it]

Mismatch: Predicted 100.0, Expected 50.0


685it [16:02,  1.53s/it]

Match: Predicted 11.0, Expected 11.0
Current correct count: 137


686it [16:04,  1.72s/it]

Match: Predicted 14.0, Expected 14.0
Current correct count: 138


688it [16:07,  1.72s/it]

Mismatch: Predicted 19.0, Expected 13.0


695it [16:21,  1.86s/it]

Mismatch: Predicted 17.0, Expected 9.0


696it [16:22,  1.55s/it]

Mismatch: Predicted 10.0, Expected 5.0


700it [16:28,  1.55s/it]

Mismatch: Predicted 16.0, Expected 8.0


706it [16:39,  1.72s/it]

Match: Predicted 120.0, Expected 120.0
Current correct count: 139


707it [16:41,  1.59s/it]

Mismatch: Predicted 0.0, Expected 9.0


708it [16:42,  1.41s/it]

Mismatch: Predicted 20.0, Expected 8.0


709it [16:43,  1.46s/it]

Match: Predicted 168.0, Expected 168.0
Current correct count: 140


714it [16:51,  1.61s/it]

Match: Predicted 576.0, Expected 576.0
Current correct count: 141


715it [16:53,  1.60s/it]

Mismatch: Predicted 29.0, Expected 10.0


716it [16:56,  1.89s/it]

Match: Predicted 385000.0, Expected 385000.0
Current correct count: 142


719it [17:01,  1.74s/it]

Mismatch: Predicted 7.0, Expected 2.0


721it [17:04,  1.57s/it]

Match: Predicted 4.0, Expected 4.0
Current correct count: 143


723it [17:06,  1.41s/it]

Mismatch: Predicted 825.0, Expected 255.0


724it [17:08,  1.44s/it]

Match: Predicted 160.0, Expected 160.0
Current correct count: 144


729it [17:16,  1.60s/it]

Mismatch: Predicted 50.0, Expected 40.0


731it [17:20,  1.56s/it]

Match: Predicted 8.0, Expected 8.0
Current correct count: 145


734it [17:24,  1.37s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 146


735it [17:26,  1.61s/it]

Mismatch: Predicted 19.0, Expected 17.0


737it [17:29,  1.63s/it]

Mismatch: Predicted 67.0, Expected 3.0


742it [17:37,  1.51s/it]

Mismatch: Predicted 32.0, Expected 28.0


743it [17:38,  1.39s/it]

Mismatch: Predicted 32.0, Expected 24.0


744it [17:40,  1.44s/it]

Match: Predicted 8.0, Expected 8.0
Current correct count: 147


745it [17:41,  1.41s/it]

Match: Predicted 4.0, Expected 4.0
Current correct count: 148


748it [17:46,  1.62s/it]

Mismatch: Predicted 7000.0, Expected 350.0


750it [17:49,  1.58s/it]

Mismatch: Predicted 6.0, Expected 3.0


753it [17:54,  1.58s/it]

Match: Predicted 240.0, Expected 240.0
Current correct count: 149


756it [18:00,  1.60s/it]

Match: Predicted 7.0, Expected 7.0
Current correct count: 150


759it [18:05,  1.58s/it]

Match: Predicted 133.0, Expected 133.0
Current correct count: 151


760it [18:06,  1.48s/it]

Match: Predicted 60000.0, Expected 60000.0
Current correct count: 152


762it [18:09,  1.51s/it]

Mismatch: Predicted 18.0, Expected 27.0


763it [18:10,  1.43s/it]

Match: Predicted 85.0, Expected 85.0
Current correct count: 153


765it [18:13,  1.35s/it]

Mismatch: Predicted 6.0, Expected 14.0


768it [18:17,  1.44s/it]

Match: Predicted 60.0, Expected 60.0
Current correct count: 154


769it [18:18,  1.35s/it]

Mismatch: Predicted 450.0, Expected 675.0


775it [18:28,  1.41s/it]

Mismatch: Predicted 18.0, Expected 10.0


777it [18:31,  1.39s/it]

Match: Predicted 800.0, Expected 800.0
Current correct count: 155


782it [18:38,  1.46s/it]

Mismatch: Predicted 4.0, Expected 17.0


784it [18:42,  1.46s/it]

Mismatch: Predicted 8.0, Expected 4.0


787it [18:46,  1.56s/it]

Match: Predicted 142.0, Expected 142.0
Current correct count: 156


788it [18:47,  1.37s/it]

Mismatch: Predicted 32.0, Expected 25.0


789it [18:49,  1.42s/it]

Match: Predicted 100.0, Expected 100.0
Current correct count: 157


791it [18:53,  1.73s/it]

Match: Predicted 108.0, Expected 108.0
Current correct count: 158


793it [18:57,  1.76s/it]

Mismatch: Predicted 7.5, Expected 75.0


795it [19:00,  1.60s/it]

Match: Predicted 32.0, Expected 32.0
Current correct count: 159


797it [19:03,  1.59s/it]

Mismatch: Predicted 880000.0, Expected 2880000.0


798it [19:04,  1.47s/it]

Match: Predicted 540.0, Expected 540.0
Current correct count: 160


806it [19:17,  1.41s/it]

Mismatch: Predicted 1080.0, Expected 1170.0


807it [19:18,  1.42s/it]

Mismatch: Predicted 54.0, Expected 70.0


809it [19:21,  1.50s/it]

Mismatch: Predicted 8.25, Expected 12.0


810it [19:23,  1.54s/it]

Match: Predicted 50.0, Expected 50.0
Current correct count: 161


812it [19:27,  1.64s/it]

Mismatch: Predicted 4.0, Expected 60.0


816it [19:33,  1.44s/it]

Mismatch: Predicted 20.0, Expected 4.0


817it [19:34,  1.45s/it]

Match: Predicted 4500.0, Expected 4500.0
Current correct count: 162


818it [19:36,  1.42s/it]

Match: Predicted 15.0, Expected 15.0
Current correct count: 163


820it [19:39,  1.48s/it]

Mismatch: Predicted 6250.0, Expected 6.0


826it [19:48,  1.45s/it]

Mismatch: Predicted 18.0, Expected 153.0


827it [19:49,  1.27s/it]

Mismatch: Predicted 13.0, Expected 27.0


829it [19:52,  1.36s/it]

Mismatch: Predicted 160.0, Expected 60.0


834it [20:00,  1.58s/it]

Mismatch: Predicted 480.0, Expected 40.0


835it [20:02,  1.55s/it]

Mismatch: Predicted 64.0, Expected 80.0


838it [20:06,  1.35s/it]

Mismatch: Predicted 600.0, Expected 120.0


841it [20:10,  1.38s/it]

Mismatch: Predicted 9.0, Expected 16.0


843it [20:13,  1.37s/it]

Match: Predicted 10.0, Expected 10.0
Current correct count: 164


846it [20:18,  1.41s/it]

Mismatch: Predicted 5.0, Expected 12.0


848it [20:21,  1.45s/it]

Mismatch: Predicted 525.0, Expected 975.0


852it [20:27,  1.47s/it]

Mismatch: Predicted 220.0, Expected 110.0


859it [20:38,  1.55s/it]

Mismatch: Predicted 60.0, Expected 3430.0


863it [20:45,  1.61s/it]

Mismatch: Predicted 48.0, Expected 40.0


864it [20:46,  1.47s/it]

Match: Predicted 110.0, Expected 110.0
Current correct count: 165


868it [20:52,  1.41s/it]

Match: Predicted 7.0, Expected 7.0
Current correct count: 166


870it [20:56,  1.67s/it]

Match: Predicted 500.0, Expected 500.0
Current correct count: 167


871it [20:57,  1.60s/it]

Match: Predicted 40.0, Expected 40.0
Current correct count: 168


874it [21:02,  1.55s/it]

Match: Predicted 12.0, Expected 12.0
Current correct count: 169


875it [21:03,  1.52s/it]

Match: Predicted 132.0, Expected 132.0
Current correct count: 170


876it [21:05,  1.51s/it]

Match: Predicted 60.0, Expected 60.0
Current correct count: 171


879it [21:09,  1.47s/it]

Mismatch: Predicted 7.0, Expected 5.0


880it [21:10,  1.33s/it]

Mismatch: Predicted 800.0, Expected 575.0


882it [21:13,  1.44s/it]

Mismatch: Predicted 44.0, Expected 16.0


884it [21:16,  1.29s/it]

Mismatch: Predicted 50.0, Expected 25.0


885it [21:17,  1.14s/it]

Match: Predicted 50.0, Expected 50.0
Current correct count: 172


889it [21:23,  1.39s/it]

Match: Predicted 10.0, Expected 10.0
Current correct count: 173


890it [21:24,  1.28s/it]

Mismatch: Predicted 5.0, Expected 15.0


892it [21:27,  1.35s/it]

Mismatch: Predicted 24.0, Expected 55.0


893it [21:28,  1.34s/it]

Mismatch: Predicted 1.67, Expected 1.0


894it [21:30,  1.36s/it]

Match: Predicted 480.0, Expected 480.0
Current correct count: 174


896it [21:33,  1.48s/it]

Match: Predicted 74.0, Expected 74.0
Current correct count: 175


897it [21:34,  1.48s/it]

Match: Predicted 250.0, Expected 250.0
Current correct count: 176


898it [21:35,  1.25s/it]

Mismatch: Predicted 4.0, Expected 1.0


901it [21:40,  1.39s/it]

Mismatch: Predicted 2.0, Expected 15.0


903it [21:42,  1.40s/it]

Match: Predicted 8.0, Expected 8.0
Current correct count: 177


905it [21:45,  1.42s/it]

Mismatch: Predicted 72.0, Expected 8.0


906it [21:47,  1.41s/it]

Mismatch: Predicted 15.0, Expected 5.0


908it [21:50,  1.41s/it]

Match: Predicted 16.0, Expected 16.0
Current correct count: 178


909it [21:51,  1.45s/it]

Match: Predicted 14.0, Expected 14.0
Current correct count: 179


911it [21:55,  1.55s/it]

Match: Predicted 700.0, Expected 700.0
Current correct count: 180


914it [22:00,  1.67s/it]

Mismatch: Predicted 38.0, Expected 6.0


915it [22:01,  1.53s/it]

Match: Predicted 3.0, Expected 3.0
Current correct count: 181


916it [22:02,  1.36s/it]

Mismatch: Predicted 18.0, Expected 23.0


918it [22:05,  1.38s/it]

Match: Predicted 12.0, Expected 12.0
Current correct count: 182


919it [22:06,  1.33s/it]

Match: Predicted 56.0, Expected 56.0
Current correct count: 183


920it [22:08,  1.33s/it]

Mismatch: Predicted 39.0, Expected 90.0


923it [22:12,  1.45s/it]

Mismatch: Predicted 10.0, Expected 60.0


924it [22:14,  1.46s/it]

Mismatch: Predicted 12.0, Expected 2.0


925it [22:15,  1.46s/it]

Mismatch: Predicted 24.0, Expected 12.0


926it [22:17,  1.51s/it]

Match: Predicted 2000.0, Expected 2000.0
Current correct count: 184


930it [22:23,  1.57s/it]

Match: Predicted 60.0, Expected 60.0
Current correct count: 185


932it [22:27,  1.79s/it]

Mismatch: Predicted 30.0, Expected 50.0


939it [22:39,  1.60s/it]

Mismatch: Predicted 75.0, Expected 4.0


941it [22:41,  1.37s/it]

Match: Predicted 280.0, Expected 280.0
Current correct count: 186


943it [22:45,  1.57s/it]

Mismatch: Predicted 177.5, Expected 205.0


947it [22:51,  1.60s/it]

Match: Predicted 450.0, Expected 450.0
Current correct count: 187


948it [22:52,  1.36s/it]

Match: Predicted 13.0, Expected 13.0
Current correct count: 188


950it [22:56,  1.65s/it]

Mismatch: Predicted 15.5, Expected 42.0


951it [22:57,  1.44s/it]

Mismatch: Predicted 25.0, Expected 5.0


952it [22:58,  1.37s/it]

Match: Predicted 300.0, Expected 300.0
Current correct count: 189


953it [22:59,  1.27s/it]

Mismatch: Predicted 180.0, Expected 360.0


955it [23:02,  1.35s/it]

Mismatch: Predicted 14.0, Expected 34.0


957it [23:05,  1.53s/it]

Mismatch: Predicted 3.0, Expected 1.0


958it [23:06,  1.39s/it]

Match: Predicted 45.0, Expected 45.0
Current correct count: 190


959it [23:08,  1.40s/it]

Mismatch: Predicted 60.0, Expected 40.0


962it [23:13,  1.51s/it]

Match: Predicted 225.0, Expected 225.0
Current correct count: 191


963it [23:14,  1.39s/it]

Match: Predicted 1000.0, Expected 1000.0
Current correct count: 192


965it [23:17,  1.44s/it]

Match: Predicted 200.0, Expected 200.0
Current correct count: 193


971it [23:27,  1.63s/it]

Match: Predicted 33.0, Expected 33.0
Current correct count: 194


974it [23:31,  1.48s/it]

Mismatch: Predicted 52.0, Expected 79.0


979it [23:37,  1.25s/it]

Match: Predicted 160.0, Expected 160.0
Current correct count: 195


980it [23:38,  1.24s/it]

Match: Predicted 50.0, Expected 50.0
Current correct count: 196


981it [23:40,  1.22s/it]

Match: Predicted 90.0, Expected 90.0
Current correct count: 197


983it [23:42,  1.21s/it]

Match: Predicted 12.0, Expected 12.0
Current correct count: 198


984it [23:43,  1.13s/it]

Mismatch: Predicted 5.0, Expected 15.0


986it [23:45,  1.11s/it]

Match: Predicted 63.0, Expected 63.0
Current correct count: 199


988it [23:47,  1.10s/it]

Mismatch: Predicted 11.0, Expected 3.0


992it [23:52,  1.00s/it]

Mismatch: Predicted 90.0, Expected 15.0


993it [23:53,  1.24s/it]

Mismatch: Predicted 18.0, Expected 52.0


994it [23:56,  1.57s/it]

Match: Predicted 11.0, Expected 11.0
Current correct count: 200


996it [23:58,  1.29s/it]

Match: Predicted 12.0, Expected 12.0
Current correct count: 201


998it [24:01,  1.30s/it]

Mismatch: Predicted 600.0, Expected 6.0


1001it [24:04,  1.20s/it]

Mismatch: Predicted 4.0, Expected 1.0


1003it [24:06,  1.01s/it]

Mismatch: Predicted 16.0, Expected 8.0


1005it [24:09,  1.17s/it]

Match: Predicted 8.0, Expected 8.0
Current correct count: 202


1008it [24:12,  1.12s/it]

Mismatch: Predicted 12.0, Expected 34.0


1009it [24:13,  1.13s/it]

Match: Predicted 230.0, Expected 230.0
Current correct count: 203


1018it [24:26,  1.43s/it]

Mismatch: Predicted 5.0, Expected 45.0


1019it [24:27,  1.22s/it]

Mismatch: Predicted 180.0, Expected 60.0


1021it [24:29,  1.17s/it]

Match: Predicted 22.0, Expected 22.0
Current correct count: 204


1026it [24:35,  1.27s/it]

Mismatch: Predicted 40.0, Expected 30.0


1029it [24:40,  1.45s/it]

Match: Predicted 200.0, Expected 200.0
Current correct count: 205


1033it [24:45,  1.36s/it]

Mismatch: Predicted 150.0, Expected 450.0


1035it [24:48,  1.28s/it]

Match: Predicted 66.0, Expected 66.0
Current correct count: 206


1036it [24:49,  1.24s/it]

Mismatch: Predicted 40.0, Expected 35.0


1037it [24:50,  1.14s/it]

Match: Predicted 10.0, Expected 10.0
Current correct count: 207


1042it [24:57,  1.32s/it]

Match: Predicted 101.0, Expected 101.0
Current correct count: 208


1044it [24:59,  1.10s/it]

Mismatch: Predicted 26000.0, Expected 130000.0


1045it [25:00,  1.10s/it]

Match: Predicted 1.0, Expected 1.0
Current correct count: 209


1048it [25:04,  1.20s/it]

Mismatch: Predicted 9.0, Expected 10.0


1052it [25:09,  1.24s/it]

Match: Predicted 15.0, Expected 15.0
Current correct count: 210


1053it [25:10,  1.20s/it]

Mismatch: Predicted 140.0, Expected 110.0


1056it [25:14,  1.19s/it]

Match: Predicted 2304.0, Expected 2304.0
Current correct count: 211


1058it [25:16,  1.12s/it]

Mismatch: Predicted 20.0, Expected 24.0


1059it [25:17,  1.07s/it]

Match: Predicted 250.0, Expected 250.0
Current correct count: 212


1060it [25:18,  1.05s/it]

Mismatch: Predicted 6.0, Expected 2.0


1061it [25:19,  1.09s/it]

Mismatch: Predicted 35.0, Expected 31.0


1064it [25:23,  1.32s/it]

Mismatch: Predicted 80.0, Expected 320.0


1072it [25:37,  1.61s/it]

Mismatch: Predicted 275.0, Expected 251.0


1074it [25:39,  1.41s/it]

Mismatch: Predicted 291.0, Expected 21.0


1075it [25:40,  1.32s/it]

Mismatch: Predicted 500.0, Expected 750.0


1076it [25:42,  1.27s/it]

Mismatch: Predicted 460.0, Expected 16.0


1081it [25:48,  1.20s/it]

Mismatch: Predicted 1200.0, Expected 72000.0


1082it [25:49,  1.08s/it]

Match: Predicted 195.0, Expected 195.0
Current correct count: 213


1083it [25:50,  1.10s/it]

Match: Predicted 2.0, Expected 2.0
Current correct count: 214


1086it [25:53,  1.10s/it]

Match: Predicted 26.0, Expected 26.0
Current correct count: 215


1090it [26:00,  1.43s/it]

Mismatch: Predicted 10.0, Expected 32.0


1092it [26:02,  1.18s/it]

Mismatch: Predicted 10000.0, Expected 1000.0


1093it [26:03,  1.20s/it]

Mismatch: Predicted 108.0, Expected 1080.0


1096it [26:07,  1.34s/it]

Mismatch: Predicted 90.0, Expected 270.0


1098it [26:10,  1.41s/it]

Match: Predicted 480.0, Expected 480.0
Current correct count: 216


1099it [26:11,  1.34s/it]

Match: Predicted 30.0, Expected 30.0
Current correct count: 217


1102it [26:14,  1.17s/it]

Match: Predicted 16.0, Expected 16.0
Current correct count: 218


1104it [26:16,  1.07s/it]

Mismatch: Predicted 180.0, Expected 90.0


1105it [26:17,  1.04s/it]

Match: Predicted 24.0, Expected 24.0
Current correct count: 219


1107it [26:20,  1.07s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 220


1109it [26:22,  1.06s/it]

Match: Predicted 38.0, Expected 38.0
Current correct count: 221


1112it [26:27,  1.49s/it]

Mismatch: Predicted 567.0, Expected 157.0


1115it [26:29,  1.06it/s]

Mismatch: Predicted 6.0, Expected 8.0


1116it [26:30,  1.06it/s]

Mismatch: Predicted 6.0, Expected 5.0


1117it [26:31,  1.07it/s]

Match: Predicted 60.0, Expected 60.0
Current correct count: 222


1118it [26:32,  1.10it/s]

Match: Predicted 9.0, Expected 9.0
Current correct count: 223


1121it [26:36,  1.12s/it]

Match: Predicted 560.0, Expected 560.0
Current correct count: 224


1122it [26:36,  1.07s/it]

Match: Predicted 35.0, Expected 35.0
Current correct count: 225


1123it [26:37,  1.05it/s]

Mismatch: Predicted 20.0, Expected 18.0


1125it [26:39,  1.11it/s]

Mismatch: Predicted 16.0, Expected 64.0


1126it [26:40,  1.02s/it]

Mismatch: Predicted 9.0, Expected 90.0


1127it [26:41,  1.04it/s]

Match: Predicted 50.0, Expected 50.0
Current correct count: 226


1128it [26:42,  1.02it/s]

Match: Predicted 750.0, Expected 750.0
Current correct count: 227


1129it [26:44,  1.14s/it]

Match: Predicted 9.0, Expected 9.0
Current correct count: 228


1136it [26:53,  1.10s/it]

Mismatch: Predicted 21.0, Expected 32.0


1137it [26:54,  1.02it/s]

Mismatch: Predicted 50.0, Expected 26.0


1138it [26:56,  1.26s/it]

Mismatch: Predicted 27.0, Expected 68.0


1143it [27:02,  1.23s/it]

Mismatch: Predicted 25.0, Expected 9.0


1144it [27:02,  1.09s/it]

Mismatch: Predicted 60.0, Expected 300.0


1150it [27:11,  1.28s/it]

Mismatch: Predicted 1157.0, Expected 93.0


1151it [27:12,  1.17s/it]

Mismatch: Predicted 91.0, Expected 21.0


1152it [27:13,  1.16s/it]

Match: Predicted 50.0, Expected 50.0
Current correct count: 229


1153it [27:15,  1.30s/it]

Mismatch: Predicted 100.0, Expected 12.0


1154it [27:15,  1.16s/it]

Mismatch: Predicted 9.0, Expected 20.0


1155it [27:17,  1.19s/it]

Match: Predicted 30.0, Expected 30.0
Current correct count: 230


1156it [27:18,  1.28s/it]

Match: Predicted 13.0, Expected 13.0
Current correct count: 231


1157it [27:20,  1.36s/it]

Mismatch: Predicted 20.0, Expected 120.0


1161it [27:27,  1.60s/it]

Mismatch: Predicted 1105.0, Expected 1125.0


1163it [27:29,  1.50s/it]

Match: Predicted 3.0, Expected 3.0
Current correct count: 232


1164it [27:30,  1.26s/it]

Mismatch: Predicted 6.0, Expected 12.0


1168it [27:36,  1.31s/it]

Match: Predicted 120.0, Expected 120.0
Current correct count: 233


1170it [27:38,  1.27s/it]

Mismatch: Predicted 1.0, Expected 2.0


1174it [27:44,  1.28s/it]

Mismatch: Predicted 100.0, Expected 10.0


1179it [27:50,  1.25s/it]

Match: Predicted 17.0, Expected 17.0
Current correct count: 234


1180it [27:51,  1.18s/it]

Mismatch: Predicted 37.0, Expected 50.0


1183it [27:56,  1.44s/it]

Mismatch: Predicted 18.0, Expected 1800.0


1184it [27:57,  1.29s/it]

Mismatch: Predicted 13.0, Expected 11.0


1185it [27:58,  1.13s/it]

Match: Predicted 306.0, Expected 306.0
Current correct count: 235


1189it [28:02,  1.15s/it]

Mismatch: Predicted 34.0, Expected 24.0


1190it [28:03,  1.11s/it]

Mismatch: Predicted 11.0, Expected 6.0


1192it [28:06,  1.10s/it]

Match: Predicted 100.0, Expected 100.0
Current correct count: 236


1193it [28:07,  1.15s/it]

Mismatch: Predicted 220.0, Expected 280.0


1195it [28:10,  1.31s/it]

Match: Predicted 1200.0, Expected 1200.0
Current correct count: 237


1205it [28:28,  2.05s/it]

Mismatch: Predicted 700.0, Expected 168.0


1206it [28:29,  1.73s/it]

Mismatch: Predicted 3.0, Expected 4.0


1207it [28:31,  1.61s/it]

Mismatch: Predicted 40000.0, Expected 40.0


1208it [28:32,  1.59s/it]

Match: Predicted 64.0, Expected 64.0
Current correct count: 238


1209it [28:33,  1.39s/it]

Mismatch: Predicted 9.0, Expected 27.0


1211it [28:36,  1.45s/it]

Match: Predicted 288.0, Expected 288.0
Current correct count: 239


1212it [28:37,  1.36s/it]

Match: Predicted 448.0, Expected 448.0
Current correct count: 240


1213it [28:38,  1.14s/it]

Mismatch: Predicted 960.0, Expected 150.0


1214it [28:39,  1.10s/it]

Mismatch: Predicted 281.0, Expected 31.0


1220it [28:46,  1.21s/it]

Match: Predicted 100.0, Expected 100.0
Current correct count: 241


1221it [28:47,  1.01s/it]

Mismatch: Predicted 160.0, Expected 32.0


1222it [28:48,  1.03s/it]

Match: Predicted 10.0, Expected 10.0
Current correct count: 242


1223it [28:49,  1.07it/s]

Mismatch: Predicted 70.0, Expected 350.0


1225it [28:51,  1.02s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 243


1226it [28:52,  1.10it/s]

Mismatch: Predicted 14.0, Expected 3.0


1229it [28:56,  1.30s/it]

Match: Predicted 31.0, Expected 31.0
Current correct count: 244


1235it [29:03,  1.17s/it]

Mismatch: Predicted 36.0, Expected 12.0


1236it [29:04,  1.18s/it]

Mismatch: Predicted 2.5, Expected 60.0


1237it [29:06,  1.19s/it]

Mismatch: Predicted 78.0, Expected 84.0


1238it [29:06,  1.00it/s]

Mismatch: Predicted 22.0, Expected 78.0


1241it [29:10,  1.06s/it]

Mismatch: Predicted 11.0, Expected 2.0


1242it [29:11,  1.11s/it]

Mismatch: Predicted 2.5, Expected 8.0


1247it [29:17,  1.15s/it]

Match: Predicted 3360.0, Expected 3360.0
Current correct count: 245


1248it [29:18,  1.08s/it]

Mismatch: Predicted 4.0, Expected 7.0


1251it [29:21,  1.12s/it]

Match: Predicted 22.0, Expected 22.0
Current correct count: 246


1253it [29:23,  1.11s/it]

Mismatch: Predicted 100.0, Expected 70.0


1254it [29:25,  1.31s/it]

Mismatch: Predicted 450.0, Expected 120.0


1258it [29:31,  1.47s/it]

Match: Predicted 14.0, Expected 14.0
Current correct count: 247


1259it [29:32,  1.35s/it]

Mismatch: Predicted 50.0, Expected 60.0


1260it [29:33,  1.25s/it]

Mismatch: Predicted 240.0, Expected 7200.0


1261it [29:35,  1.24s/it]

Match: Predicted 5.0, Expected 5.0
Current correct count: 248


1264it [29:38,  1.22s/it]

Mismatch: Predicted 20.0, Expected 500.0


1270it [29:47,  1.34s/it]

Mismatch: Predicted 234.0, Expected 54.0


1274it [29:52,  1.21s/it]

Mismatch: Predicted 35.0, Expected 27.0


1275it [29:53,  1.18s/it]

Mismatch: Predicted 26.0, Expected 38.0


1276it [29:53,  1.02s/it]

Mismatch: Predicted 1825.0, Expected 16.0


1277it [29:56,  1.40s/it]

Mismatch: Predicted 600.0, Expected 70.0


1278it [29:58,  1.55s/it]

Mismatch: Predicted 28.0, Expected 48.0


1279it [29:59,  1.39s/it]

Mismatch: Predicted 765.0, Expected 665.0


1280it [29:59,  1.22s/it]

Mismatch: Predicted 15.0, Expected 180.0


1281it [30:00,  1.13s/it]

Match: Predicted 7.0, Expected 7.0
Current correct count: 249


1282it [30:01,  1.13s/it]

Mismatch: Predicted 60.0, Expected 20.0


1285it [30:05,  1.13s/it]

Mismatch: Predicted 7.0, Expected 25.0


1286it [30:06,  1.13s/it]

Match: Predicted 1218.0, Expected 1218.0
Current correct count: 250


1291it [30:12,  1.14s/it]

Mismatch: Predicted 144.0, Expected 90.0


1292it [30:13,  1.04s/it]

Match: Predicted 27.0, Expected 27.0
Current correct count: 251


1293it [30:14,  1.01s/it]

Mismatch: Predicted 69.0, Expected 67.0


1294it [30:15,  1.06s/it]

Mismatch: Predicted 10000.0, Expected 140000.0


1297it [30:19,  1.17s/it]

Match: Predicted 335.0, Expected 335.0
Current correct count: 252


1300it [30:22,  1.11s/it]

Mismatch: Predicted 21.0, Expected 13.0


1305it [30:29,  1.28s/it]

Match: Predicted 1000.0, Expected 1000.0
Current correct count: 253


1306it [30:30,  1.12s/it]

Mismatch: Predicted 1235.0, Expected 2325.0


1308it [30:32,  1.07s/it]

Mismatch: Predicted 24.0, Expected 8.0


1309it [30:33,  1.08s/it]

Match: Predicted 30.0, Expected 30.0
Current correct count: 254


1312it [30:37,  1.15s/it]

Match: Predicted 594.0, Expected 594.0
Current correct count: 255


1314it [30:39,  1.08s/it]

Mismatch: Predicted 100.0, Expected 2.0


1315it [30:40,  1.06it/s]

Mismatch: Predicted 35.0, Expected 8.0


1317it [30:42,  1.10s/it]

Mismatch: Predicted 470.0, Expected 230.0


1318it [30:43,  1.08s/it]

Mismatch: Predicted 15.0, Expected 5.0


1319it [30:44,  1.40s/it]

Mismatch: Predicted 21.0, Expected 14.0
Final correct count: 255
Accuracy on validation dataset: 40.9967845659164%





In [9]:
print(f"accuracy: {accuracy}")

accuracy: 40.9967845659164
