In [1]:
import pandas as pd
import re
from transformers import AutoTokenizer, GPT2LMHeadModel, GPT2Tokenizer, T5ForConditionalGeneration
import tokenizers
import json
import puz
import os
import numpy as np
import streamlit as st
import scipy

import sys
import subprocess
import copy
import json

from itertools import zip_longest
from copy import deepcopy
import regex

from solver.Crossword import Crossword
from solver.BPSolver import BPSolver
from models import setup_closedbook, setup_t5_reranker, DPRForCrossword
from solver.Utils import print_grid

from tqdm import tqdm
from pprint import pprint
import pandas as pd
import pickle
import shutil
import time

from utils import puz_to_json

In [2]:
def solve(crossword, max_candidate):
    solver = BPSolver(crossword, max_candidates = max_candidate)
    solution = solver.solve(num_iters = 60, iterative_improvement_steps = 0)
    solver.evaluate(solution)

In [3]:
def json_CA_json_converter(json_file_path, is_path):
    try:
        if is_path:
            with open(json_file_path, "r") as file:
                data = json.load(file)
        else:
            data = json_file_path

        json_conversion_dict = {}

        rows = data["size"]["rows"]
        cols = data["size"]["cols"]
        date = data["date"]

        clues = data["clues"]
        answers = data["answers"]

        json_conversion_dict["metadata"] = {"date": date, "rows": rows, "cols": cols}

        across_clue_answer = {}
        down_clue_answer = {}

        for clue, ans in zip(clues["across"], answers["across"]):
            split_clue = clue.split(" ")
            clue_num = split_clue[0][:-1]
            clue_ = " ".join(split_clue[1:])
            clue_ = clue_.replace("[", "").replace("]", "")
            across_clue_answer[clue_num] = [clue_, ans]

        for clue, ans in zip(clues["down"], answers["down"]):
            split_clue = clue.split(" ")
            clue_num = split_clue[0][:-1]
            clue_ = " ".join(split_clue[1:])
            clue_ = clue_.replace("[", "").replace("]", "")
            down_clue_answer[clue_num] = [clue_, ans]

        json_conversion_dict["clues"] = {
            "across": across_clue_answer,
            "down": down_clue_answer,
        }

        grid_info = data["grid"]
        grid_num = data["gridnums"]

        grid_info_list = []
        for i in range(rows):
            row_list = []
            for j in range(cols):
                if grid_info[i * rows + j] == ".":
                    row_list.append("BLACK")
                else:
                    if grid_num[i * rows + j] == 0:
                        row_list.append(["", grid_info[i * rows + j]])
                    else:
                        row_list.append(
                            [str(grid_num[i * rows + j]), grid_info[i * rows + j]]
                        )
            grid_info_list.append(row_list)

        json_conversion_dict["grid"] = grid_info_list

        return json_conversion_dict
    
    except:
        print("ERROR has occured.")

In [4]:
json_puzzle_path = r"C:\Users\parzi\OneDrive - Tribhuvan University\Desktop\Major Project\CODE\BCS Code\BCS-ALL-Code\Crossword Solver\demo crosswords\crossword_1.json"

puzzle = json_CA_json_converter(json_puzzle_path, True)

for dim in ['across', 'down']:
    for grid_num in puzzle['clues'][dim].keys():
        clue_answer_list = puzzle['clues'][dim][grid_num]
        clue_section = clue_answer_list[0]
        ans_section = clue_answer_list[1]
        clue_section = clue_section.replace("&quot;", "").replace("&#39;", "")
        puzzle['clues'][dim][grid_num] = [clue_section, ans_section]

In [5]:
crossword = Crossword(puzzle)

In [6]:
help(crossword)

Help on Crossword in module solver.Crossword object:

class Crossword(builtins.object)
 |  Crossword(data)
 |  
 |  This class defines a crossword datastructure which stores a puzzle solution and contains maps between words,
 |  crossing words, and the grid cells they contain. During initialization, this class takes a JSON representation
 |  of the crossword that matches our data scraper's specification.
 |  
 |  Args:
 |      data (dict): JSON representation of crossword data
 |  
 |  Attributes:
 |      self.letter_grid (list): 2D array representing gold crossword solution
 |      self.number_grid (list): 2D array with numbers representing beginnings of clues
 |      self.variables (dict): mapping from clues to gold answers, cells, and intersecting clues
 |  
 |  Methods defined here:
 |  
 |  __init__(self, data)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  initialize_clues(self, clues)
 |  
 |  initialize_crossing(self)
 |  
 |  initialize_grids(s

In [26]:
crossword.letter_grid

[['B', 'I', 'B', 'B', '', 'P', 'I', 'A', 'F', '', 'O', 'F', 'O', 'L', 'D'],
 ['O', 'N', 'E', 'A', '', 'U', 'G', 'L', 'I', '', 'R', 'E', 'N', 'E', 'E'],
 ['M', 'E', 'L', 'O', 'N', 'B', 'A', 'L', 'L', '', 'Z', 'E', 'S', 'T', 'A'],
 ['B', 'R', 'I', 'B', 'E', 'S', '', 'T', 'I', 'T', 'O', '', 'L', 'I', 'D'],
 ['', 'T', 'E', 'A', 'M', '', 'S', 'O', 'A', 'K', '', 'E', 'A', 'T', 'S'],
 ['', '', '', 'B', 'O', 'U', 'I', 'L', 'L', 'O', 'N', 'C', 'U', 'B', 'E'],
 ['A', 'D', 'D', 'S', '', 'M', 'A', 'D', '', '', 'R', 'O', 'G', 'E', 'T'],
 ['C', 'R', 'O', '', 'S', 'A', 'M', '', 'P', 'T', 'A', '', 'H', 'M', 'O'],
 ['T', 'Y', 'P', 'O', 'S', '', '', 'A', 'A', 'A', '', 'S', 'T', 'E', 'N'],
 ['I', 'C', 'E', 'C', 'R', 'E', 'A', 'M', 'C', 'O', 'N', 'E', '', '', ''],
 ['V', 'E', 'N', 'T', '', 'S', 'C', 'A', 'T', '', 'E', 'E', 'L', 'S', ''],
 ['I', 'R', 'A', '', 'G', 'E', 'T', 'Z', '', 'R', 'E', 'S', 'O', 'L', 'E'],
 ['S', 'E', 'N', 'S', 'E', '', 'S', 'O', 'L', 'I', 'D', 'F', 'O', 'O', 'D'],
 ['T', 'A', 'C', 

In [29]:
import torch
from transformers import T5ForConditionalGeneration, ByT5Tokenizer

# Load pre-trained ByT5 model and tokenizer
model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
tokenizer = AutoTokenizer.from_pretrained("google/byt5-small")

# Example positive and negative pairs
positive_pair = ("What is the capital of France?", "Paris")
negative_pair = ("What is the capital of France?", "Berlin")

# Tokenize inputs
positive_inputs = tokenizer(positive_pair[0], positive_pair[1], return_tensors="pt", truncation=True)
negative_inputs = tokenizer(negative_pair[0], negative_pair[1], return_tensors="pt", truncation=True)

# Forward pass
positive_logits = model(**positive_inputs).logits
negative_logits = model(**negative_inputs).logits

# Define pairwise ranking loss
loss_function = torch.nn.MarginRankingLoss(margin=1.0)
target = torch.tensor([1.0])  # Positive pair should have higher score

# Compute loss
loss = loss_function(positive_logits, negative_logits, target)
print("Loss:", loss.item())

# Continue with training loop, optimization, and evaluation as needed


Downloading config.json:   0%|          | 0.00/698 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.20G [00:00<?, ?B/s]


KeyboardInterrupt

