In [None]:
!pip install streamlit
!pip install watchdog

In [18]:
!pip freeze > requirements.txt

In [19]:
!streamlit run app.py

[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8503[0m
[34m  Network URL: [0m[1mhttp://10.16.7.104:8503[0m
[0m
VBox(children=(HTML(value='<b>Enter text (Latex Allowed) or URL:</b>'), HBox(children=(Textarea(value='Cox proportional hazard (PH) regression models \\cite{CoxD.R.1972RMaL} are widely used for analyzing time-to-event data in epidemiological and clinical research (ECR).', layout=Layout(height='100px', width='100%'), placeholder='Enter text or URL'), Button(description='Clear Input', icon='times', style=ButtonStyle(), tooltip='Click to clear input'))))) Button(button_style='primary', description='Generate Abbreviations', icon='magic', style=ButtonStyle(), tooltip='Click to generate abbreviations') VBox(children=(HBox(children=(HTML(value='<b>List of Abbreviations</b>'), Dropdown(description='Format:', options=('plain', 'nomenclature', 'tabular'), value='plain'))), HBox(children=(Textarea(value='', l

# Create the app

Click the ▶️ (play) button next to the code cell below to run the code and display the interactive widgets.

In [25]:

#%%writefile app.py

import re
import requests
from bs4 import BeautifulSoup
import ipywidgets as widgets
from IPython.display import display
from ipywidgets import HTML

def get_text_from_url(url):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
        return soup.get_text()
    except requests.exceptions.RequestException as e:
        return f"Error fetching URL: {e}"

def extract_abbreviations(text):
    pattern = re.compile(r'((?:[\w-]+\s+){1,10})\((([a-z]*[A-Z]{2,})[a-z]*)\)')
    matches = pattern.findall(text)

    abbreviation_dict = {}

    for match in matches:
        words_ahead = [word for word in re.split(r'\s+|(?<=-)(?=[A-Za-z])', match[0].strip()) if word]
        abbr = match[1]
        abbr_letters = list(re.sub(r'[^A-Z]', '', abbr.upper()))

        full_name_words = []
        abbr_index = 0

        for word in reversed(words_ahead):
            if word and len(word) > 0 and abbr_index < len(abbr_letters) and len(word.replace('-', '')) > 0 and word.replace('-', '')[0].upper() == abbr_letters[len(abbr_letters) - 1 - abbr_index]:
                full_name_words.insert(0, word)
                abbr_index += 1
            if abbr_index == len(abbr_letters):
                break

        if len(full_name_words) == len(abbr_letters):
            full_name = ''.join(word if i == 0 else (' ' + word if not full_name_words[i - 1].endswith('-') else word) for i, word in enumerate(full_name_words))
            abbreviation_dict[abbr] = full_name

    return abbreviation_dict


def format_abbreviations(abbreviations, format_type):
    if format_type == "nomenclature":
        latex_output = "\\usepackage{nomencl}\n"
        for abbr, full_name in abbreviations.items():
            latex_output += f"\\nomenclature{{{abbr}}}{{{full_name}}}\n"
        return latex_output
    elif format_type == "tabular":
        latex_output = "\\begin{tabular}{ll}\n"
        for abbr, full_name in abbreviations.items():
            latex_output += f"{abbr} & {full_name} \\\\\n"
        latex_output += "\\end{tabular}\n"
        return latex_output
    else: # Default plain text list
        output = ""
        for abbr, full_name in abbreviations.items():
            output += f"{abbr}: {full_name}; "
        return output

def process_input(input_text, format_type):
    if input_text.startswith('http'):
        text = get_text_from_url(input_text)
    else:
        text = input_text

    abbreviations = extract_abbreviations(text)
    formatted_output = format_abbreviations(abbreviations, format_type)

    output_text_box.value = formatted_output
    num_lines = formatted_output.count('\n') + 2
    output_text_box.layout.height = f'{min(num_lines * 20, 400)}px'

def clear_output_area(b):
    output_text_box.value = ''
    output_text_box.layout.height = '100px'

def clear_text_input(b):
    text_box.value = ''

def rerun_format(change):
    global text_box, output_format_dropdown
    process_input(text_box.value, change.new)

text_label = HTML(value='<b>Enter text (Latex Allowed) or URL:</b>')
text_box = widgets.Textarea(
    value=r'Cox proportional hazard (PH) regression models \cite{CoxD.R.1972RMaL} are widely used for analyzing time-to-event data in epidemiological and clinical research (ECR).',
    placeholder='Enter text or URL',
    disabled=False,
    layout=widgets.Layout(width='100%', height='100px')
)

output_label = HTML(value='<b>List of Abbreviations</b>')
output_format_dropdown = widgets.Dropdown(
    options=['plain','nomenclature', 'tabular'],
    value='plain',
    description='Format:',
)

output_box = widgets.HBox([output_label, output_format_dropdown])

output_text_box = widgets.Textarea(
    value='',
    placeholder='List of Generated Abbreviations',
    disabled=False,
    layout=widgets.Layout(width='100%', height='100px')
)

submit_button = widgets.Button(
    description='Generate Abbreviations',
    disabled=False,
    button_style='primary',
    tooltip='Click to generate abbreviations',
    icon='magic'
)

clear_output_button = widgets.Button(
    description='Clear Output',
    disabled=False,
    button_style='',
    tooltip='Click to clear output',
    icon='times'
)

clear_text_button = widgets.Button(
    description='Clear Input',
    disabled=False,
    button_style='',
    tooltip='Click to clear input',
    icon='times'
)

submit_button.on_click(lambda b: process_input(text_box.value, output_format_dropdown.value))
clear_output_button.on_click(clear_output_area)
clear_text_button.on_click(clear_text_input)

output_format_dropdown.observe(rerun_format, names='value')

input_box_with_clear = widgets.VBox([text_label, widgets.HBox([text_box, clear_text_button])])
output_box_with_clear = widgets.VBox([output_box, widgets.HBox([output_text_box, clear_output_button])])

display(input_box_with_clear, submit_button, output_box_with_clear)


VBox(children=(HTML(value='<b>Enter text (Latex Allowed) or URL:</b>'), HBox(children=(Textarea(value='Cox pro…

Button(button_style='primary', description='Generate Abbreviations', icon='magic', style=ButtonStyle(), toolti…

VBox(children=(HBox(children=(HTML(value='<b>List of Abbreviations</b>'), Dropdown(description='Format:', opti…

In [None]:
def format_abbreviations(abbreviations, format_type):
    if format_type == "nomenclature":
        latex_output = "\\usepackage{nomencl}\n"
        for abbr, full_name in abbreviations.items():
            latex_output += f"\\nomenclature{{{abbr}}}{{{full_name}}}\n"
        return latex_output
    elif format_type == "tabular":
        latex_output = "\\begin{tabular}{ll}\n"
        for abbr, full_name in abbreviations.items():
            latex_output += f"{abbr} & {full_name} \\\\\n"
        latex_output += "\\end{tabular}\n"
        return latex_output
    else: # Default plain text list
        output = ""
        for abbr, full_name in abbreviations.items():
            output += f"{abbr}: {full_name}; "
        return output


In [None]:
import re
def extract_abbreviations(text):
    pattern = re.compile(r'((?:[\w-]+\s+){1,10})\((([a-z]*[A-Z]{2,})[a-z]*)\)')
    matches = pattern.findall(text)

    abbreviation_dict = {}

    for match in matches:
        words_ahead = [word for word in re.split(r'\s+|(?<=-)(?=[A-Za-z])', match[0].strip()) if word]
        abbr = match[1]
        abbr_letters = list(re.sub(r'[^A-Z]', '', abbr.upper()))

        full_name_words = []
        abbr_index = 0

        for word in reversed(words_ahead):
            if word and len(word) > 0 and abbr_index < len(abbr_letters) and len(word.replace('-', '')) > 0 and word.replace('-', '')[0].upper() == abbr_letters[len(abbr_letters) - 1 - abbr_index]:
                full_name_words.insert(0, word)
                abbr_index += 1
            if abbr_index == len(abbr_letters):
                break

        if len(full_name_words) == len(abbr_letters):
            full_name = ''.join(word if i == 0 else (' ' + word if not full_name_words[i - 1].endswith('-') else word) for i, word in enumerate(full_name_words))
            abbreviation_dict[abbr] = full_name # Corrected indentation

    return abbreviation_dict

def format_abbreviations(abbreviations, format_type="plain"):
    if format_type == "nomenclature":
        latex_output = "\\usepackage{nomencl}\n"
        for abbr, full_name in abbreviations.items():
            latex_output += f"\\nomenclature{{{abbr}}}{{{full_name}}}\n"
        return latex_output
    elif format_type == "tabular":
        latex_output = "\\begin{tabular}{ll}\n"
        for abbr, full_name in abbreviations.items():
            latex_output += f"{abbr} & {full_name} \\\\\n"
        latex_output += "\\end{tabular}\n"
        return latex_output
    else: # Default plain text list
        output = ""
        for abbr, full_name in abbreviations.items():
            output += f"{abbr}: {full_name}; "
        return output


In [None]:
# @title An Example of Input Text
text_input = r"""
In this paper, we propose utilizing $Z$-residuals to diagnose Cox PH models. The recent studies by Li et al. 2021 \cite{LiLonghai2021Mdfc} and Wu et al. 2024 \cite{WuTingxuan2024Zdtf} introduced the concept of randomized survival probabilities (RSPs) to define $Z$-residuals for diagnosing model assumptions in accelerated failure time (AFT) and shared frailty models. The RSP approach involves replacing the survival probability of a censored failure time with a uniform random number between 0 and the survival probability of the censored time \cite{WuTingxuan2024Zdtf}. The RSPs for $t_{i}$ in the Cox PH model are defined as:



"""

In [None]:
# @title Testing the Extraction Function
abbreviations = extract_abbreviations(text_input)
print(format_abbreviations(abbreviations))


AFT: accelerated failure time; 
