# Genetic Code Exploration Helper

## Initialization

In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output
from tabulate import tabulate
from matplotlib import pyplot as plt
from matplotlib.patches import Patch
import numpy as np
from complexity_analysis import compute_complexity_track
import traceback

# Codon table definition
h_sapiens = {
    'A': {'codons': ['GCU', 'GCC', 'GCA', 'GCG'], 'frequencies': [0.27, 0.4, 0.23, 0.1]},
    'R': {'codons': ['CGU', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'], 'frequencies': [0.08, 0.19, 0.11, 0.21, 0.21, 0.2]},
    'N': {'codons': ['AAU', 'AAC'], 'frequencies': [0.47, 0.53]},
    'D': {'codons': ['GAU', 'GAC'], 'frequencies': [0.46, 0.54]},
    'C': {'codons': ['UGU', 'UGC'], 'frequencies': [0.46, 0.54]},
    'E': {'codons': ['GAA', 'GAG'], 'frequencies': [0.42, 0.58]},
    'Q': {'codons': ['CAA', 'CAG'], 'frequencies': [0.27, 0.73]},
    'G': {'codons': ['GGU', 'GGC', 'GGA', 'GGG'], 'frequencies': [0.16, 0.34, 0.25, 0.25]},
    'H': {'codons': ['CAU', 'CAC'], 'frequencies': [0.42, 0.58]},
    'I': {'codons': ['AUU', 'AUC', 'AUA'], 'frequencies': [0.36, 0.47, 0.17]},
    'L': {'codons': ['UUA', 'UUG', 'CUU', 'CUC', 'CUA', 'CUG'], 'frequencies': [0.07, 0.13, 0.13, 0.2, 0.07, 0.4]},
    'K': {'codons': ['AAA', 'AAG'], 'frequencies': [0.43, 0.57]},
    'M': {'codons': ['AUG'], 'frequencies': [1.0]},
    'F': {'codons': ['UUU', 'UUC'], 'frequencies': [0.46, 0.54]},
    'P': {'codons': ['CCU', 'CCC', 'CCA', 'CCG'], 'frequencies': [0.29, 0.32, 0.28, 0.11]},
    'S': {'codons': ['UCU', 'UCC', 'UCA', 'UCG', 'AGU', 'AGC'], 'frequencies': [0.18, 0.22, 0.15, 0.06, 0.15, 0.24]},
    'T': {'codons': ['ACU', 'ACC', 'ACA', 'ACG'], 'frequencies': [0.25, 0.36, 0.28, 0.11]},
    'W': {'codons': ['UGG'], 'frequencies': [1]},
    'Y': {'codons': ['UAU', 'UAC'], 'frequencies': [0.44, 0.56]},
    'V': {'codons': ['GUU', 'GUC', 'GUA', 'GUG'], 'frequencies': [0.18, 0.24, 0.12, 0.46]},
    # Add stop codons if desired
    # '*': {'codons': ['UAA', 'UAG', 'UGA'], 'frequencies': [0.3, 0.3, 0.4]}
}

e_coli = {
    'A': {'codons': ['GCU', 'GCC', 'GCA', 'GCG'], 'frequencies': [0.18, 0.26, 0.23, 0.33]},
    'R': {'codons': ['CGU', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'], 'frequencies': [0.36, 0.36, 0.07, 0.11, 0.07, 0.03]},
    'N': {'codons': ['AAU', 'AAC'], 'frequencies': [0.49, 0.51]},
    'D': {'codons': ['GAU', 'GAC'], 'frequencies': [0.63, 0.37]},
    'C': {'codons': ['UGU', 'UGC'], 'frequencies': [0.46, 0.54]},
    'E': {'codons': ['GAA', 'GAG'], 'frequencies': [0.68, 0.32]},
    'Q': {'codons': ['CAA', 'CAG'], 'frequencies': [0.34, 0.66]},
    'G': {'codons': ['GGU', 'GGC', 'GGA', 'GGG'], 'frequencies': [0.35, 0.37, 0.13, 0.15]},
    'H': {'codons': ['CAU', 'CAC'], 'frequencies': [0.57, 0.43]},
    'I': {'codons': ['AUU', 'AUC', 'AUA'], 'frequencies': [0.5, 0.39, 0.11]},
    'L': {'codons': ['UUA', 'UUG', 'CUU', 'CUC', 'CUA', 'CUG'], 'frequencies': [0.14, 0.13, 0.12, 0.1, 0.04, 0.47]},
    'K': {'codons': ['AAA', 'AAG'], 'frequencies': [0.74, 0.26]},
    'M': {'codons': ['AUG'], 'frequencies': [1]},
    'F': {'codons': ['UUU', 'UUC'], 'frequencies': [0.58, 0.42]},
    'P': {'codons': ['CCU', 'CCC', 'CCA', 'CCG'], 'frequencies': [0.18, 0.13, 0.2, 0.49]},
    'S': {'codons': ['UCU', 'UCC', 'UCA', 'UCG', 'AGU', 'AGC'], 'frequencies': [0.17, 0.15, 0.14, 0.14, 0.16, 0.24]},
    'T': {'codons': ['ACU', 'ACC', 'ACA', 'ACG'], 'frequencies': [0.19, 0.4, 0.17, 0.24]},
    'W': {'codons': ['UGG'], 'frequencies': [1]},
    'Y': {'codons': ['UAU', 'UAC'], 'frequencies': [0.59, 0.41]},
    'V': {'codons': ['GUU', 'GUC', 'GUA', 'GUG'], 'frequencies': [0.28, 0.2, 0.17, 0.35]},
    # '*': {'codons': ['UAA', 'UAG', 'UGA'], 'frequencies': [0.6, 0.1, 0.3]}
}

stop_codons = {"UAA", "UAG", "UGA"}

# Map for dropdown selection
reference_tables = {
    "H. sapiens": h_sapiens,
    "E. coli": e_coli
}

# Populate list of lists w/ codon & amino acid per sub-list with reference codons
ref_cod = []
for key, value in h_sapiens.items():
    for codon in value['codons']:
        ref_cod.append([codon, key])

## Analysis Block

In [2]:
# Main Analysis Function

plt.close('all') # Clear everything

NO_COMP_VALUE = "__NO_COMPARISON__" # Placeholder in dropdown list if no comparison is required

def analyze_sequence(
        project, input_seq,
        max_font_size, ref_table,
        ref_table_name, gc_winsize_1,
        gc_winsize_2, gc3_winsize_1,
        gc3_winsize_2, comp_window,
        comp_step, comp_k,
        comp_alpha, comp_smooth
        ): # ref_table is now dict or NO_COMP_VALUE
    """
    Analyzes the codon usage of the input sequence, optionally comparing against a reference table.
    Also analyzes the GC and GC3 content as well as the sequence complexity.

    Args:
        input_seq (str):
            The raw nucleic acid sequence input by the user.
        
        ref_table (dict or str):
            The chosen reference codon usage dictionary.
            NO_COMP_VALUE if no comparison is selected.
        
        ref_table_name (str):
            The name of the reference table (e.g., "H. sapiens") or "No Comparison".
    """
    # --- Input Processing and Validation ---
    # Modify this part if visual inclusion of stop codons is desired
    print("--- Processing input sequence ---")
    if not input_seq.strip():
        raise ValueError('empty')

    input_seq = input_seq.strip().upper().replace(" ", "").replace("\n", "").replace("\r", "")
    input_seq = input_seq.replace("T", "U")

    valid_chars = set("ACGU")
    if not set(input_seq).issubset(valid_chars):
        invalid_found = set(input_seq) - valid_chars
        print(f"   Invalid characters found: {', '.join(sorted(list(invalid_found)))}")
        raise ValueError('illegal')

    lengthmod = len(input_seq) % 3
    if lengthmod != 0:
        input_seq = input_seq[:-lengthmod]
        print(f"WARNING: Trimmed incomplete 3' codon ({lengthmod} bases removed).")
    
    clean_seq = input_seq   # Cleaned sequence south of here

    seq_cods = [] # Break cleaned input sequence into list of codons, check for stop
    stop_flag = False
    i = 0
    while i < len(clean_seq):
        codon = clean_seq[i:i+3]
        if codon in stop_codons:
            print(f"WARNING: Stop codon '{codon}' found at base {i+1}/{len(clean_seq)}. Sequence will be truncated.")
            clean_seq = clean_seq[:i]
            stop_flag = True
            break
        seq_cods.append(codon)
        i += 3

    num_codons_analyzed = len(seq_cods)
    print(f"Sequence length (after trimming): {len(clean_seq)} bases")
    print(f"Stop codons found: {stop_flag}")
    print(f"Codons entering analysis: {num_codons_analyzed}")

    # --- GC ---
    gc_count = clean_seq.count('G') + clean_seq.count('C')
    print(f'GC content: {gc_count / len(clean_seq) * 100:.2f}%')

    # --- GC3 ---
    gc3_flag = []
    gc3_counter = 0
    gc3_ratio = []
    for idx, cod in enumerate(seq_cods):
        if (cod[2] == 'G') or (cod[2] == 'C'):
            gc3_flag.append('1')
            gc3_counter += 1
        else:
            gc3_flag.append('0')

        gc3_ratio.append(gc3_counter / (idx + 1) * 100)
    
    print(f"GC3 content: {gc3_counter / len(seq_cods) * 100:.2f}%")

    # --- Codon/AA Counting ---
    cod_counter = []
    for codon_aa_pair in ref_cod:
        if codon_aa_pair[1] != '*':
            cod_counter.append([codon_aa_pair[0], 0, codon_aa_pair[1]]) # Construct counter list of lists

    for codon in seq_cods:
        for entry in cod_counter:
            if entry[0] == codon:
                entry[1] += 1
                break
    # cod_counter now contains counts of all codons in the input sequence

    # --- Frequency Calculation and Formatting ---
    present = [entry for entry in cod_counter if entry[1] != 0] # Aggregate codons with counts
    if not present:
        raise ValueError('no_codons')

    # Sort AAs and their corresponding codons - MAYBE JUST PRESORT REF_COD?
    sorted_present = []
    aa_ord = sorted(list(set(entry[2] for entry in present)))
    for aa in aa_ord:
        for entry in present:
            if aa == entry[2]:
                sorted_present.append(entry)

    degeneracy = [] # This will be a list of dictionaries with each dict containing AA, unique codon count, and codons with absolute counts
    temp_present = sorted_present[:] # Shallow copy
    while temp_present:
        current_aa = temp_present[0][2]
        count = 0
        codons_for_aa = []
        indices_to_remove = []
        for i, entry in enumerate(temp_present):
            if entry[2] == current_aa:
                count += 1
                codons_for_aa.append(entry[0:2]) # Store codon and count
                indices_to_remove.append(i)
        if codons_for_aa:
            degeneracy.append({'aa': current_aa, 'count': count, 'codons': codons_for_aa})
        for i in sorted(indices_to_remove, reverse = True): # Reverse to prevent index shifting
            del temp_present[i]
        
    anal_raw = []       # Final analysis result for plotting
    anal_pretty = []    # Final analysis result for tabular output
    for dic in degeneracy:
        aa = dic['aa']
        codon_entries = dic['codons']
        codons_list = [entry[0] for entry in codon_entries]
        counts_list = [entry[1] for entry in codon_entries]
        cumulative = sum(counts_list)
        frequencies = [count / cumulative for count in counts_list] # Calculate relative codon freqs
        anal_pretty.append([
            aa,
            ", ".join(codons_list),
            ", ".join([f"{r:.2f}" for r in frequencies])
        ])
        anal_raw.append([aa, codons_list, frequencies])

    # --- Codon Usage Table Output---
    print("\n--- Codon Usage Frequency in Input Sequence ---")
    col_names = ["AA", "Codons Found", "Observed Frequencies"]
    print(tabulate(anal_pretty, headers = col_names, tablefmt = "grid"))
    
    # --- Sliding Window GC Calculation ---
    def isgc(base):
        return base in 'GC'

    def gc_sliding_W(seq, winsize):
        if winsize > len(seq):
            print('ERROR: GC window size must be smaller than query length')
            return []

        results = []
        # Initialize first window
        window = seq[:winsize].count('G') + seq[:winsize].count('C')
        results.append(window / winsize * 100)

        # Start sliding
        for i in range(1, len(seq) - winsize + 1):
            # Trailing base
            if isgc(seq[i - 1]):
                window -= 1
            
            # Leading base
            if isgc(seq[i + winsize - 1]):
                window += 1
            
            results.append(window / winsize * 100)

        print(f'Max.: {max(results):.2f}%')
        print(f'Min.: {min(results):.2f}%')
        print(f'Mean: {np.mean(results):.2f}%')
        return results
    
    
    # --- Sliding Window GC3 Calculation ---
    def gc3_sliding_W(seq, winsize):
        if winsize > len(seq):
            print('ERROR: GC3 window size must be smaller than codons in query.')
            return []

        results = []
        # Initialize first window
        window = sum(isgc(seq[i][2]) for i in range(winsize))   # isgc defined under GC calc.
        results.append(window / winsize * 100)

        # Start sliding
        for i in range(1, len(seq) - winsize + 1):
            # Trailing base
            if isgc(seq[i - 1][2]):
                window -= 1
            
            # Leading base
            if isgc(seq[i + winsize - 1][2]):
                window += 1
            
            results.append(window / winsize * 100)
        
        print(f'Max.: {max(results):.2f}%')
        print(f'Min.: {min(results):.2f}%')
        print(f'Mean: {np.mean(results):.2f}%')
        return results
    
    
    # --- Codon Usage Plot ---
    print("\n--- Generating codon usage plot ---")

    amino_acids = [item[0] for item in anal_raw] # Extract amino acids for x-axis ticks

    # Change plot colors here, if desired
    colors_list = ['#8986e5', '#f6786c', '#36b600', '#00bfc3', '#9690fe', '#e66bf3']
    colors_list_ref = ['#b8b6ef', '#faada7', '#88d366', '#66c5e8', '#bfbcff', '#f1a6f8']

    plt.figure(figsize = (17, 8))
    index = np.arange(len(amino_acids))
    sub_size = round(0.8 * max_font_size)
    bar_width = 0.35

    is_comparing = ref_table != NO_COMP_VALUE # Set "check ref table" flag

    for i, plot_data in enumerate(anal_raw): # plot_data is [aa, [codons], [frequencies]]
        aa = plot_data[0]
        codons = plot_data[1]
        frequencies = plot_data[2]

        # Bar positions
        if is_comparing:
            input_pos = index[i] - bar_width / 2
            ref_pos = index[i] + bar_width / 2
            current_bar_width = bar_width
        else: # Not comparing
            input_pos = index[i]
            ref_pos = None
            current_bar_width = bar_width * 1.5

        # Plot: Analyzed Sequence (using codons and frequencies directly from plot_data)
        bottom = 0
        for j, (codon, freq) in enumerate(zip(codons, frequencies)):
            color_index = j % len(colors_list)
            color = colors_list[color_index]

            plt.bar(input_pos, freq, current_bar_width, bottom = bottom, color = color,
                    edgecolor = 'grey', linewidth = 0.5)

            y_position = bottom + freq / 2
            is_max_freq = (freq == max(frequencies)) # Boolean flag for max frequency
            fontsize = max_font_size if is_max_freq else sub_size
            fontweight = 'bold' if is_max_freq else 'normal'
            rotation = 90 if is_max_freq else 0
            if max_font_size > 0:
                plt.text(input_pos, y_position, codon, ha = 'center', va = 'center',
                            fontsize = fontsize, color = 'white', fontweight = fontweight, rotation = rotation)
            bottom += freq

        # Plot: Reference Table Graph
        if is_comparing:
            ref_codons = ref_table[aa]['codons']
            ref_frequencies = ref_table[aa]['frequencies']
            ref_bottom = 0
            for j, (codon, freq) in enumerate(zip(ref_codons, ref_frequencies)):
                color_index = j % len(colors_list_ref)
                color = colors_list_ref[color_index]
                
                plt.bar(ref_pos, freq, current_bar_width, bottom = ref_bottom, color = color,
                        hatch='///', edgecolor = 'grey', linewidth = 0.5)

                y_position = ref_bottom + freq / 2
                is_max_freq = (freq == max(ref_frequencies)) # Boolean flag for max frequency in reference
                fontsize = max_font_size if is_max_freq else sub_size
                fontweight = 'bold' if is_max_freq else 'normal'
                rotation = 90 if is_max_freq else 0
                if max_font_size > 0:
                    plt.text(ref_pos, y_position, codon, ha='center', va='center',
                            fontsize = fontsize, color = 'black', fontweight = fontweight, rotation = rotation)
                ref_bottom += freq

    # --- Legend Handles ---
    legend_handles = []
    analyzed_patch = Patch(facecolor = 'white', edgecolor = 'black', label = 'Analyzed Sequence')
    legend_handles.append(analyzed_patch)

    if is_comparing: # Handle for reference table
        reference_patch = Patch(facecolor = 'white', edgecolor = 'black', hatch = '///', 
                                label = f'Reference: {ref_table_name}')
        legend_handles.append(reference_patch)

    # --- Frequency plot ---
    plt.xlabel('Amino Acids')
    plt.ylabel('Relative Codon Frequency')
    plt.title(f'Codon Map - {project}')
    plt.xticks(index, amino_acids, rotation = 45, ha = 'center')
    plt.ylim(0, 1.05)
    plt.legend(handles = legend_handles)
    plt.tight_layout()
    plt.show()

    # --- Complexity Track (sliding window) ---
    comp = compute_complexity_track(
        clean_seq,
        window_size = comp_window,
        step = comp_step,
        k = comp_k,
        gcbal_alpha = comp_alpha,
        smooth = comp_smooth)
    
    x = comp['mid']; y = comp['score']

    if len(y) > 0:
        plt.figure(figsize = (17, 8))
        plt.plot(x, y, lw = 1)
        plt.ylim(0, 1)
        plt.xlabel('Position (bp)')
        plt.ylabel('Complexity')
        plt.title(f'Sliding-window Complexity (W = {comp_window}, step = {comp_step}, k = {comp_k}, alpha = {comp_alpha}, smooth = {comp_smooth}) - {project}')
        plt.tight_layout()
        plt.axhline(y = 0.5, color = 'green', linestyle = '--', linewidth = 0.8)
        plt.axhspan(0, 0.2, color = "red", alpha = 0.12)
        plt.axhspan(0.5, 1, color = "green", alpha = 0.12)
        plt.axhline(y = y.max(), color = 'grey', linestyle = '--', linewidth = 0.8)
        plt.axhline(y = y.min(), color = 'grey', linestyle = '--', linewidth = 0.8)
        
        plt.show()
    else:
        print(f'Sequence too short for complexity track (window_size={comp_window}).')

    # --- Sliding Window GC plot ---
    print('\n--- GC Calculations ---')
    print('GC window 1:')
    gc_1 = gc_sliding_W(clean_seq, gc_winsize_1)
    plt.figure(figsize = (17, 8))   # This makes sure all the figure contents scale correctly
    gc_win_1, = plt.plot(gc_1)
    gc_handles = [gc_win_1]
    gc_labels = [f'Window Size: {gc_winsize_1}']

    # --- Second GC Overlay ---
    if gc_winsize_2 > 0:
        gc_second_window = True
        print('\nGC window 2:')
        gc_2 = gc_sliding_W(clean_seq, gc_winsize_2)
        gc_win_2, = plt.plot(gc_2)
        gc_handles.append(gc_win_2)
        gc_labels.append(f'Window Size: {gc_winsize_2}')
    else:
        gc_second_window = False

    plt.title(f'GC % Moving Average - {project}')
    plt.xlabel('Base Position at Window Start')
    plt.ylabel('GC Percentage')

    # Let matplotlib determine the step size
    # Make sure the step is never smaller than 1
    default_ticks = plt.gca().get_xticks()
    xstep = int(max(1, default_ticks[1] - default_ticks[0]))
    xticks = range(0, len(gc_1), xstep)
    xlabels = [str(i + 1) for i in xticks]
    plt.xticks(xticks, xlabels)
    plt.ylim(0, 105)
    plt.legend(handles = gc_handles, labels = gc_labels)
    plt.axhline(y = 50, color = 'green', linestyle = '--', linewidth = 0.8)

    # Annotate variability channel for first window
    if not gc_second_window:
        y_max = max(gc_1)
        plt.axhline(y = y_max, color = 'grey', linestyle = '--', linewidth = 0.8)
        y_min = min(gc_1)
        plt.axhline(y = y_min, color = 'grey', linestyle = '--', linewidth = 0.8)
        plt.annotate(
            f"{y_max:.2f}",
            xy = (0, y_max),
            xytext = (0, y_max + 1.5),
            )
        plt.annotate(
            f"{y_min:.2f}",
            xy = (0, y_min),
            xytext = (0, y_min - 3),
            )

    plt.show()

    # --- Sliding Window GC3 plot ---
    print('GC3 window 1:')
    gc3_slide = gc3_sliding_W(seq_cods, gc3_winsize_1)
    plt.figure(figsize = (17, 8)) 
    gc3_win_1, = plt.plot(gc3_slide)
    gc3_handles = [gc3_win_1]
    gc3_labels = [f'Window Size: {gc3_winsize_1}']

    # --- Second GC3 Overlay ---
    if gc3_winsize_2 > 0:
        print('\nGC3 window 2:')
        gc3_2 = gc3_sliding_W(seq_cods, gc3_winsize_2)
        gc3_win_2, = plt.plot(gc3_2)
        gc3_handles.append(gc3_win_2)
        gc3_labels.append(f'Window Size: {gc3_winsize_2}')

    plt.title(f'GC3 % Moving Average - {project}')
    plt.xlabel('Codon Position at Window Start')
    plt.ylabel('GC3 Percentage')

    # Let matplotlib determine the step size
    # Make sure the step is never smaller than 1
    default_ticks = plt.gca().get_xticks()
    xstep = int(max(1, default_ticks[1] - default_ticks[0]))
    xticks = range(0, len(gc3_slide), xstep)
    xlabels = [str(i + 1) for i in xticks]
    plt.xticks(xticks, xlabels)
    plt.ylim(0, 105)
    plt.legend(handles = gc3_handles, labels = gc3_labels)
        
    plt.show()

    # --- Global GC3 plot ---
    plt.figure(figsize = (17, 8))
    plt.plot(gc3_ratio)
    plt.xlabel('Codon Position')
    plt.ylabel('GC3 Percentage')
    plt.title(f'GC3 Ratio Along Sequence - {project}')

    # Let matplotlib determine the step size
    # Make sure the step is never smaller than 1
    default_ticks = plt.gca().get_xticks()
    xstep = int(max(1, default_ticks[1] - default_ticks[0]))
    xticks = range(0, len(gc3_ratio), xstep)
    xlabels = [str(i + 1) for i in xticks]
    plt.xticks(xticks, xlabels)
    plt.ylim(0, 105)
    plt.show()


# (End of analyze_sequence function)

## GUI Handling

In [None]:
# Create Widgets

# --- Project Name / Plot Title ---
project_name = widgets.Text(
    value = 'Codon Usage Analysis',
    placeholder = 'Enter project name',
    description = 'Title:',
    layout = widgets.Layout(width = '95%')
)

# --- Sequence Input ---
seq_input = widgets.Textarea(
    placeholder = 'Enter nucleic acid sequence here (A, C, G, T or U)',
    description = 'Sequence:',
    layout = widgets.Layout(height = '150px', width = '95%')
)

# --- Dropdown for Reference Choice ---
# Use a special string value instead of None for "No Comparison"
NO_COMP_VALUE = "__NO_COMPARISON__" # Define the placeholder value
dropdown_options = [( "No Comparison", NO_COMP_VALUE )] + list(reference_tables.items())

ref_select = widgets.Dropdown(
    options = dropdown_options,
    value = NO_COMP_VALUE, # Set default value to the placeholder string
    description = 'Reference:',
)

# --- Label Font Size ---
fontsize_input = widgets.BoundedIntText(
    value = 12, # Default font size for max frequency codon
    min = 0,   # Off
    step = 1,
    description = 'Label Size:',
    layout = widgets.Layout(width = '200px')
)

# --- Sliding Windows ---
slide_label = widgets.HTML(value="<b>Sliding Window Parameters:</b>")

# --- GC Controls ---
gc_window_1 = widgets.BoundedIntText(
    value = 50,
    min = 1,
    max = 1000000,
    description = 'GC (1):',
    layout = widgets.Layout(width = '200px')
)

gc_window_2 = widgets.BoundedIntText(
    value = 0,  # 0 means off
    max = 1000000,
    description = 'GC (2):',
    layout = widgets.Layout(width = '200px')
)

# --- GC3 Controls ---
gc3_window_1 = widgets.BoundedIntText(
    value = 15,
    min = 1,
    max = 1000000,
    description = 'GC3 (1):',
    layout = widgets.Layout(width = '200px')
)

gc3_window_2 = widgets.BoundedIntText(
    value = 0,  # 0 means off
    max = 1000000,
    description = 'GC3 (2):',
    layout = widgets.Layout(width = '200px')
)

# --- Complexity Controls ---
comp_label = widgets.HTML('<b>Complexity Track Parameters:</b>')

complexity_window = widgets.BoundedIntText(
    value = 50,
    min = 1,
    max = 1000000,
    description = 'Window:',
    layout = widgets.Layout(width = '200px')
)

complexity_step = widgets.BoundedIntText(
    value = 5,
    min = 1,
    max = 1000000,
    description = 'Step:',
    layout = widgets.Layout(width = '200px')
)

complexity_k = widgets.BoundedIntText(
    value = 3,
    min = 1,
    max = 10,
    description = 'k-mer k:',
    layout = widgets.Layout(width = '200px')
)

complexity_smooth = widgets.BoundedIntText(
    value = 4,
    min = 0,  # 0 or 1 disables smoothing
    max = 49,
    description = 'Smooth:',
    layout = widgets.Layout(width = '200px')
)

# GC-balance alpha for complexity (0 = no GC influence)
complexity_alpha = widgets.FloatSlider(
    value = 0.5,
    min = 0.0,
    max = 2.0,
    step = 0.05,
    description = 'GC alpha:',
    readout_format = '.2f',
    orientation = 'vertical',
    layout = widgets.Layout(height = '180px')
)

# --- Run Button ---
run_button = widgets.Button(
    description = 'Run Analysis',
    button_style = 'success',
    tooltip = 'Click to analyze the sequence',
    icon = 'cogs'
)

# --- Clear Button ---
clear_button = widgets.Button(
    description = 'Clear',
    button_style = 'warning',
    tooltip = 'Clear everything',
    icon = 'recycle'
)

# --- Output widget ---
output_area = widgets.Output(layout = widgets.Layout(border = '1px solid black', padding = '5px', overflow = 'auto'))

# Button Click Handler

def on_run_button_clicked(_):
    with output_area:
        clear_output(wait = True)
        print("Starting Analysis..." + "\n")
        try:
            # Input
            pron = project_name.value
            sequence = seq_input.value
            # Reference selection
            selected_ref_dict = ref_select.value
            selected_ref_name = ref_select.label
            # Font sizing
            szyslak = fontsize_input.value
            # GC window size
            gc_winsize_1 = gc_window_1.value
            gc_winsize_2 = gc_window_2.value
            # GC3 window size
            gc3_winsize_1 = gc3_window_1.value
            gc3_winsize_2 = gc3_window_2.value
            # Complexity parameters
            comp_window = complexity_window.value
            comp_step = complexity_step.value
            comp_k = complexity_k.value
            comp_alpha = complexity_alpha.value
            comp_smooth = complexity_smooth.value
            # Run analysis
            analyze_sequence(
                pron, sequence, szyslak,
                selected_ref_dict, selected_ref_name, gc_winsize_1,
                gc_winsize_2, gc3_winsize_1, gc3_winsize_2,
                comp_window, comp_step, comp_k,
                comp_alpha, comp_smooth
                )
            
            print("\nAnalysis Complete.")
                
        except Exception as e:
            match str(e):
                case 'empty':
                    print('\nERROR: Input sequence is empty.')
                
                case 'illegal':
                    print('\nIllegal characters detected. Input must be a nucleotide sequence (A, C, G, T/U)')
                
                case 'no_codons':
                    print('\nERROR: No codons matching the standard genetic code were counted in the input.')
                
                case _:
                    print(f"\nAn unexpected error occurred during analysis: {e}")
                    traceback.print_exc()
                    
            
def on_clear_button_clicked(_):
    with output_area:
        clear_output()
        seq_input.value = ''

# Showtime!
run_button.on_click(on_run_button_clicked)
clear_button.on_click(on_clear_button_clicked)

## Output

In [4]:
# Display Widgets

input_widgets = widgets.VBox(
    [
        project_name,
        seq_input,
        ref_select,
        fontsize_input,
        widgets.HBox([
            widgets.VBox([
                slide_label,
                gc_window_1,
                gc_window_2,
                gc3_window_1,
                gc3_window_2,
            ], layout = widgets.Layout(margin = '0 40px 0 0')),
            widgets.VBox([
                comp_label,
                complexity_window,
                complexity_step,
                complexity_k,
                complexity_smooth,
            ]),
            complexity_alpha
        ], layout = widgets.Layout(margin = '20px 0 0 0')),
        widgets.HBox([
            run_button,
            clear_button
        ], layout = widgets.Layout(margin = '20px 0 0 0'))
    ]
)

display(input_widgets, output_area)

VBox(children=(Text(value='Codon Usage Analysis', description='Title:', layout=Layout(width='95%'), placeholde…

Output(layout=Layout(border_bottom='1px solid black', border_left='1px solid black', border_right='1px solid b…