In [1]:
import pandas as pd
import re
import tokenizers
import puz
import os
import numpy as np
import streamlit as st
import scipy
import sys
import subprocess
import copy
import json
import pickle
import shutil
import time
import regex
import requests
import datetime

from itertools import zip_longest
from copy import deepcopy
from tqdm import tqdm
from pprint import pprint
from utils import puz_to_json

from transformers import AutoTokenizer, T5ForConditionalGeneration
from solver.Crossword import Crossword
from solver.BPSolver import BPSolver
from solver.Utils import print_grid
from models import setup_closedbook, setup_t5_reranker, DPRForCrossword

In [2]:
def solve(crossword, max_candidate):
    solver = BPSolver(crossword, max_candidates = max_candidate)
    solution = solver.solve(num_iters = 60, iterative_improvement_steps = 1)
    solver.evaluate(solution)
    
def json_CA_json_converter(json_file_path, is_path):
    try:
        if is_path:
            with open(json_file_path, "r") as file:
                data = json.load(file)
        else:
            data = json_file_path

        json_conversion_dict = {}

        rows = data["size"]["rows"]
        cols = data["size"]["cols"]
        date = data["date"]

        clues = data["clues"]
        answers = data["answers"]

        json_conversion_dict["metadata"] = {"date": date, "rows": rows, "cols": cols}

        across_clue_answer = {}
        down_clue_answer = {}

        for clue, ans in zip(clues["across"], answers["across"]):
            split_clue = clue.split(" ")
            clue_num = split_clue[0][:-1]
            clue_ = " ".join(split_clue[1:])
            clue_ = clue_.replace("[", "").replace("]", "")
            across_clue_answer[clue_num] = [clue_, ans]

        for clue, ans in zip(clues["down"], answers["down"]):
            split_clue = clue.split(" ")
            clue_num = split_clue[0][:-1]
            clue_ = " ".join(split_clue[1:])
            clue_ = clue_.replace("[", "").replace("]", "")
            down_clue_answer[clue_num] = [clue_, ans]

        json_conversion_dict["clues"] = {
            "across": across_clue_answer,
            "down": down_clue_answer,
        }

        grid_info = data["grid"]
        grid_num = data["gridnums"]

        grid_info_list = []
        for i in range(rows):
            row_list = []
            for j in range(cols):
                if grid_info[i * rows + j] == ".":
                    row_list.append("BLACK")
                else:
                    if grid_num[i * rows + j] == 0:
                        row_list.append(["", grid_info[i * rows + j]])
                    else:
                        row_list.append(
                            [str(grid_num[i * rows + j]), grid_info[i * rows + j]]
                        )
            grid_info_list.append(row_list)

        json_conversion_dict["grid"] = grid_info_list

        return json_conversion_dict
    
    except:
        print("ERROR has occured.")

def getGrid(dateStr):

    headers = {
        'Referer': 'https://www.xwordinfo.com/JSON/'
    }
    # mm/dd/yyyy

    url = 'https://www.xwordinfo.com/JSON/Data.ashx?date=' + dateStr

    response = requests.get(url, headers=headers)

    context = {}
    grid_data = {}
    if response.status_code == 200:
        bytevalue = response.content
        jsonText = bytevalue.decode('utf-8').replace("'", '"')
        grid_data = json.loads(jsonText)
        puzzle_data = json_CA_json_converter(grid_data, False)
        for dim in ['across', 'down']:
            for grid_num in puzzle_data['clues'][dim].keys():
                clue_answer_list = puzzle_data['clues'][dim][grid_num]
                clue_section = clue_answer_list[0]
                ans_section = clue_answer_list[1]
                clue_section = clue_section.replace("&quot;", "'").replace("&#39;", "'")
                puzzle_data['clues'][dim][grid_num] = [clue_section, ans_section]
        return puzzle_data

    else:
        print(f"Request failed with status code {response.status_code}.")

In [None]:
date_list = [datetime.date(2023, 12, 9)]

for date in date_list:
    formatted_date = date.strftime('%m/%d/%Y')
    print(formatted_date)

    try:
        puzzle = getGrid(formatted_date)
        start_time = time.time()
        crossword = Crossword(puzzle)
        solve(crossword, 50000)
        end_time = time.time()
        print("Total time taken: ", end_time - start_time)
    except:
        print("Error Occured for date: ", formatted_date)

In [None]:
counter = 0
for k, value in crossword.variables.items():
    print(value['gold'])
    counter += 1
print(counter)

In [None]:
all_clues = ["'Try not to attract attention'", 'Disney channel', "'The Nutcracker' role", 'One pulling some strings', 'Long part of a bouzouki', 'Big deals', "Amy who wrote 'The Kitchen God's Wife'", 'Played out', 'Dig locale', 'Not here', 'The Eagle, e.g., for short', 'Geographical inspiration for Strauss', "'100% correct!'", 'Safaris, e.g.', 'Not std.', 'Fish also known as wahoo', 'Border region along the Rhine', 'Bit of assistance', 'Run like a mouse', 'Defiant declaration popularized by the drag queen Bianca Del Rio', 'Shell-inspired shade of greenish blue', 'This and that', 'First Nigerian-born singer to win a Grammy', 'Epithet for Tarzan', 'Causes to grow, humorously', 'Car-washing equipment', "'Alas!''", 'Tab, essentially', 'Play again', '___ Championship', 'Founder of the Shondaland production company', 'Itchy layers', 'Locale for Ping-Pong, foosball, dancing, etc.', 'Safari destinations', 'U.N. secretary general Guterres', 'Quick', 'Most of the English force at Agincourt', "'Goddess of the loud hunt,' in Homer's 'Iliad'", 'Pulitzer category', 'What might be heard before a bust', 'Stab', 'Northern lapwing', 'Toys with strings', 'Did some shallow breathing?', "Bootlicker's specialty", 'Word with data or deal', 'M.M.A. decision', 'Swedish actress Lena', 'Make advances', 'Tense', 'Words to end a play', 'Break down, in a way', 'Chance to start fresh', '19th-century garb completed by top hats and gloves', "'Let's have our cake and eat it, too!'", 'Pillar of the superhero community', 'Heavenly being', "Maguire of 2009's 'Brothers'", 'Part of U.C.S.F.: Abbr.', 'Idaho senator Mike', "It's a good thing", 'Spot check?', 'Break down, in a way', 'Fictional Dr. Jones, familiarly', 'Big deal', 'Many a North African', 'Use a hitch on', 'People with a language of the same name']

len(all_clues)

In [None]:
json_puzzle_path = r"./demo crosswords/crossword_1.json"

puzzle = json_CA_json_converter(json_puzzle_path, True)

for dim in ['across', 'down']:
    for grid_num in puzzle['clues'][dim].keys():
        clue_answer_list = puzzle['clues'][dim][grid_num]
        clue_section = clue_answer_list[0]
        ans_section = clue_answer_list[1]
        clue_section = clue_section.replace("&quot;", "").replace("&#39;", "")
        puzzle['clues'][dim][grid_num] = [clue_section, ans_section]

In [None]:
crossword.letter_grid

In [None]:
import torch
from transformers import T5ForConditionalGeneration, ByT5Tokenizer

# Load pre-trained ByT5 model and tokenizer
model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
tokenizer = AutoTokenizer.from_pretrained("google/byt5-small")

# Example positive and negative pairs
positive_pair = ("What is the capital of France?", "Paris")
negative_pair = ("What is the capital of France?", "Berlin")

# Tokenize inputs
positive_inputs = tokenizer(positive_pair[0], positive_pair[1], return_tensors="pt", truncation=True)
negative_inputs = tokenizer(negative_pair[0], negative_pair[1], return_tensors="pt", truncation=True)

# Forward pass
positive_logits = model(**positive_inputs).logits
negative_logits = model(**negative_inputs).logits

# Define pairwise ranking loss
loss_function = torch.nn.MarginRankingLoss(margin=1.0)
target = torch.tensor([1.0])  # Positive pair should have higher score

# Compute loss
loss = loss_function(positive_logits, negative_logits, target)
print("Loss:", loss.item())

# Continue with training loop, optimization, and evaluation as needed
