In [1]:
from pydantic import BaseModel


class Query(BaseModel):
    text: str
    relevant: bool | None = None

In [4]:
import ipywidgets as widgets
from IPython.display import display

# Text boxes for key and value input
query = widgets.Text(description="Query:", layout=widgets.Layout(width="600px", height="40px", font_size="20px"))

# Button to add key-value pair
add_button = widgets.Button(description="Add", layout=widgets.Layout(width="300px"))

# Button to close the input
save_button = widgets.Button(description="Save", layout=widgets.Layout(width="300px"))

buttons = widgets.HBox([add_button, save_button])

# Output area to display messages
output_area = widgets.Output()

filename = input("Enter the filename to save queries to (empty for 'queries.jsonl'): ") or "queries.jsonl"

with output_area:
    print(f"Queries will be saved to '{filename}'")


def save_queries(filename: str, queries: list[Query]):
    with open(filename, "a") as file:
        for query in queries:
            file.write(query.model_dump_json() + "\n")


queries = []
query_index = 0


def add_query(b):
    if query.value and filename:
        global query_index
        queries.append(Query(text=query.value))
        with output_area:
            print(f"Added [{query_index}]: {query.value}")
        query.value = ""
        query_index += 1


def save(b):
    query.unobserve(add_query, names="value")
    query.disabled = True
    add_button.disabled = True
    save_button.disabled = True
    save_queries(filename, queries)
    with output_area:
        print("Queries saved to file.")


add_button.on_click(add_query)
query.continuous_update = False
query.observe(add_query, names="value")
save_button.on_click(save)

display(query, buttons, output_area)

Text(value='', continuous_update=False, description='Query:', layout=Layout(height='40px', width='600px'))

HBox(children=(Button(description='Add', layout=Layout(width='300px'), style=ButtonStyle()), Button(descriptio…

Output()

In [5]:
import ipywidgets as widgets
from IPython.display import display
from typing import List


# Load queries from .jsonl file
def load_queries(filename: str) -> List[Query]:
    queries = []
    with open(filename, "r") as file:
        for line in file:
            queries.append(Query.model_validate_json(line))
    return queries


# User interaction widgets
query_display = widgets.Text(
    description="Query:", disabled=True, layout=widgets.Layout(width="600px", height="40px", font_size="20px")
)

left_button = widgets.Button(description="Not Relevant", layout=widgets.Layout(width="300px"))
right_button = widgets.Button(description="Relevant", layout=widgets.Layout(width="300px"))
buttons = widgets.HBox([left_button, right_button])

output_area = widgets.Output()

filename = input("Enter the filename to load queries from (empty for 'queries.jsonl'): ") or "queries.jsonl"
output_filename = (
    input("Enter the filename to save updated queries to (empty for 'updated_queries.jsonl'): ")
    or "updated_queries.jsonl"
)

with output_area:
    print(f"Queries will be loaded from '{filename}' and saved to '{output_filename}'")

# Global variables to keep track of state
queries = []
current_index = 0


def load_and_display_queries(b):
    global queries, current_index
    queries = load_queries(filename)
    current_index = 0
    if queries:
        query_display.value = queries[current_index].text
        with output_area:
            print(f"Loaded {len(queries)} queries from {filename}")


def mark_relevant(b):
    global current_index
    if queries and current_index < len(queries):
        queries[current_index].relevant = True
        next_query()


def mark_not_relevant(b):
    global current_index
    if queries and current_index < len(queries):
        queries[current_index].relevant = False
        next_query()


def next_query():
    global current_index
    current_index += 1
    if current_index < len(queries):
        query_display.value = queries[current_index].text
    else:
        query_display.value = "All queries reviewed."
        save_queries(output_filename, queries)
        query_display.unobserve(add_query, names="value")
        left_button.disabled = True
        right_button.disabled = True


# Button actions
left_button.on_click(mark_not_relevant)
right_button.on_click(mark_relevant)

# Display the widgets
display(query_display, buttons, output_area)

# Load queries initially
load_and_display_queries(None)

Text(value='', description='Query:', disabled=True, layout=Layout(height='40px', width='600px'))

HBox(children=(Button(description='Not Relevant', layout=Layout(width='300px'), style=ButtonStyle()), Button(d…

Output()

In [7]:
import json
# Add manually relevant modules to .jsonl

# read from previously initialized output_file
input_filename = "updated_queries.jsonl"
output_filename = "queries_with_modules.jsonl"

with open(input_filename, "r") as file:
    queries = file.readlines()

queries_with_modules = []
for query in queries:
    query_parsed = json.loads(query)
    print(query_parsed["text"])
    # if query hasnt got related modules
    if query_parsed["relevant"] and "modules" not in query_parsed:
        modules = []
        for _ in range(3):
            module = "module-" + input() + ".pdf"
            modules.append(module)
        query_parsed["modules"] = modules
        print("saved")
    elif not query_parsed["relevant"]:
        query_parsed["modules"] = None

    queries_with_modules.append(json.dumps(query_parsed))

# write updated back to file
with open(output_filename, "w") as file:
    for updated_line in queries_with_modules:
        file.write(updated_line + "\n")

mathan assignments
saved
mathan assignments with answers
saved
mathan midterm prep
saved
agal syllabus
saved
agla basis vector
saved
agla inverse matrix
saved
philosophy week 5 answers
saved
discrete math week 12
saved
philosophy final exam
saved
english compound sentence
saved
descriptive paragraph
saved
memory bus
saved
latency
saved
limits example
saved
cross product where used
saved
cramer rule
saved
logic gates
saved
Big O
saved
read write files example
saved
struct
saved
pointers
saved
preparation final
saved
agla test 1 demo
saved
English plagiarism
saved
set theory naive
saved
bit  fields
saved
exercises java
saved
riscv
saved
abstract methods itp
saved
what is pipelining
saved
java final methods
saved
agla 11 lab task 2 answer
saved
English response paragraph
saved
polar system of coordinates
saved
generics
saved
integral definition
saved
lambda programming code
saved
quadric math formulaes
saved
yaylor series decomposition
saved
complete fsa definition
saved
ndfsa conversion 

In [16]:
# delete 'module-.py' elements from list


def process_line(line):
    data = json.loads(line)
    if "modules" in data and isinstance(data["modules"], list):
        data["modules"] = [module for module in data["modules"] if module and module != "module-.pdf"]
    return json.dumps(data)


with open(output_filename, "r") as infile:
    lines = infile.readlines()

processed_lines = [process_line(line) for line in lines]

with open(output_filename, "w") as outfile:
    for line in processed_lines:
        outfile.write(line + "\n")