In [2]:
import nltk 
Porter = nltk.PorterStemmer()
Lancaster = nltk.LancasterStemmer() 

In [5]:
import ipywidgets as widgets
from IPython.display import display

# Colors
border_color = 'lightgray'
background_color = 'lightblue'  # Light blue
container_background_color = 'lightgray'  # Slightly darker blue

# Create the text input for the query
query_input = widgets.Text(
    description='Query:',
    placeholder='Enter your query here',
    layout=widgets.Layout(width='60%')
)

# Create a button to submit the query
submit_button = widgets.Button(
    description='Search',
    layout=widgets.Layout(width='20%')
)

# Combine query input and submit button in a single line with styling
query_container = widgets.HBox(
    [query_input, submit_button],
    layout=widgets.Layout(
        border=f'2px solid {border_color}',
        border_radius='10px',
        padding='10px',
        background_color=border_color
    )
)

# Create dropdowns for preprocessing parameters
preprocessing_options = ['Split', 'Reg']
stemming_options = ['Porter', 'Lancaster', 'No Stemming']

preprocessing_dropdown = widgets.Dropdown(
    options=preprocessing_options,
    description='Preprocessing:',
    layout=widgets.Layout(width='45%')
)

stemming_dropdown = widgets.Dropdown(
    options=stemming_options,
    description='Stemming:',
    layout=widgets.Layout(width='45%')
)

# Group preprocessing and stemming dropdowns in a container
preprocessing_container = widgets.HBox(
    [preprocessing_dropdown, stemming_dropdown],
    layout=widgets.Layout(
        border=f'2px solid {border_color}',
        border_radius='10px',
        padding='10px',
        background_color=container_background_color,
        justify_content='space-between',
        width='50%'
    )
)

# Create checkboxes for file selection
file_options = ['DOCS per Term', 'Terms per Doc']
file_selection = widgets.RadioButtons(
    options=file_options,
    description='File Type:',
    layout=widgets.Layout(width='40%')
)

# Group preprocessing_container and file_selection in a single line container
dropdowns_container = widgets.HBox(
    [preprocessing_container, file_selection],
    layout=widgets.Layout(
        border=f'2px solid {border_color}',
        border_radius='10px',
        padding='10px',
        background_color=background_color,
        justify_content='space-between'
    )
)

# Create a text area to display results
result_area = widgets.Textarea(
    description='Results:',
    placeholder='Results will be displayed here',
    disabled=False,
    layout=widgets.Layout(
        width='100%',
        height='300px',
    )
)



# Function to handle button click
def on_submit(b):
    query = query_input.value
    preprocessing = preprocessing_dropdown.value
    stemming = stemming_dropdown.value
    selected_file = file_selection.value

    # Determine file path based on selection
    file_path = ""
    if selected_file == "Terms per Doc":  # descriptive file
        if preprocessing == "Split":
            match stemming:
                case 'No Stemming':
                    file_path = '..\\Lab 2\\descripteur_split.txt'
                case 'Porter':
                    file_path = '..\\Lab 2\\descripteur_split_porter.txt'
                case 'Lancaster':
                    file_path = '..\\Lab 2\\descripteur_split_lancaster.txt'
        else:
            match stemming:
                case 'No Stemming':
                    file_path = '..\\Lab 2\\descripteur_reg.txt'
                case 'Porter':
                    file_path = '..\\Lab 2\\descripteur_reg_porter.txt'
                case 'Lancaster':
                    file_path = '..\\Lab 2\\descripteur_reg_lancaster.txt'
    else:  # inverse file
        if preprocessing == "Split":
            match stemming:
                case 'No Stemming':
                    file_path = '..\\Lab 2\\inverse_split.txt'
                case 'Porter':
                    file_path = '..\\Lab 2\\inverse_split_porter.txt'
                    query = Porter.stem(query)
                case 'Lancaster':
                    file_path = '..\\Lab 2\\inverse_split_lancaster.txt'
                    query = Lancaster.stem(query)
        else:
            match stemming:
                case 'No Stemming':
                    file_path = '..\\Lab 2\\inverse_reg.txt'
                case 'Porter':
                    file_path = '..\\Lab 2\\inverse_reg_porter.txt'
                    query = Porter.stem(query)
                case 'Lancaster':
                    file_path = '..\\Lab 2\\inverse_reg_lancaster.txt'
                    query = Lancaster.stem(query)

    # Initialize result content with the correct header and numbered lines
    line_counter = 1
    if selected_file == "Terms per Doc":
        result_content = "N  Ndoc   Term           Freq   Weight\n"
        
        # Track terms and frequencies for the selected document
        term_count = 0
        
        
        # Filter the file content based on query and file type
        with open(file_path, 'r', encoding='utf-8') as file:
            lines = file.readlines()
            for line in lines:
                parts = line.split()
                
                # Check if the document number matches the query
                if parts[0] == query:
                    # Format and add the numbered line to result_content
                    formatted_line = f"{line_counter:<3} {parts[0]:<5} {parts[1]:<15} {parts[2]:<5} {parts[3]:<10}\n"
                    result_content += formatted_line
                    line_counter += 1
                    term_count += int(parts[2])  # Increment by term frequency
                
        # Append document vocabulary and size
        result_content += f"-------------------------------------------------------------------"
        result_content += f"\n# Doc vocabulary: {line_counter-1}               "
        result_content += f"# Doc size: {term_count}\n"
    
    else:
        result_content = "N   Term            Ndoc   Freq   Weight\n"
        
        with open(file_path, 'r', encoding='utf-8') as file:
            lines = file.readlines()
            for line in lines:
                parts = line.split()
                
                
                if parts[1] == query:
                   
                    formatted_line = f"{parts[0]:<3} {parts[1]:<15} {parts[2]:<5} {parts[3]:<5} {parts[4]:<10}\n"
                    result_content += formatted_line
                    line_counter += 1

  
    if line_counter == 1:  # Only header is present
        result_content += "No matching results found."
    
    
    result_area.value = result_content

submit_button.on_click(on_submit)

# Display all widgets
display(query_container, dropdowns_container, result_area)


HBox(children=(Text(value='', description='Query:', layout=Layout(width='60%'), placeholder='Enter your query …

HBox(children=(HBox(children=(Dropdown(description='Preprocessing:', layout=Layout(width='45%'), options=('Spl…

Textarea(value='', description='Results:', layout=Layout(height='300px', width='100%'), placeholder='Results w…