In [23]:
import pandas as pd
import numpy as np
import gradio as gr
import tempfile
from scipy.sparse import csr_matrix
import gensim.downloader as api
from sentence_transformers import SentenceTransformer, util
from sklearn.feature_extraction.text import CountVectorizer
from gensim.utils import simple_preprocess
from concurrent.futures import ThreadPoolExecutor
import chardet
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from UI_semanticMatching import *
matcher = MatchingFunction()
from UI_input_output import read_clean_model
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth',500)  

In [35]:
# Backend logic: accepts two files, processes them, returns a new DataFrame
def process_two_csvs(file_path_input, file_path_mdr):
    if file_path_input is None or file_path_mdr is None:
        return pd.DataFrame()  # Return empty DataFrame if either is missing

    # Read both files into DataFrames with their own encoding
    with open(file_path_input, 'rb') as f:
        result = chardet.detect(f.read())
    encoding_input = result['encoding']
    
    with open(file_path_mdr, 'rb') as f:
        result = chardet.detect(f.read())
    encoding_mdr = result['encoding']
    
    input_df = pd.read_csv(file_path_input, encoding=encoding_input)
    print(len(input_df))
    mdr_df = pd.read_csv(file_path_mdr, encoding=encoding_mdr)
    print(len(mdr_df))

    # clean and run through models
    result_df = read_clean_model(input_df, mdr_df)
    
    # Save to temp CSV for download
    tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
    result_df.to_csv(tmp_file.name, index=False)

    return result_df, tmp_file.name

with gr.Blocks() as demo:
    gr.Markdown("## Semantic Matching between Input and MDR")

    with gr.Row():
        l1 = gr.Markdown(
        """
        Upload Input file
        (file must contains at least two columns: **variable** and **description**)
        """)
        csv_input = gr.File(label="Upload Input File",
                    file_types=[".csv"])
        l2 = gr.Markdown(
        """
        Upload MDR file
        (file must contains at least two columns: **name** and **definition**)
        """)
        csv_mdr = gr.File(label="Upload MDR File", 
                    file_types=[".csv"])

    process_btn = gr.Button("Process Files")

    output_table = gr.Dataframe(
        label="🔍 Processed Data",
        interactive=True,   # Enables sorting, filtering, and editing
        row_count=10,
        column_widths="auto"
    )

    download_link = gr.File(label="⬇️ Download Result CSV")

    process_btn.click(
        fn=process_two_csvs,
        inputs=[csv_input, csv_mdr],
        outputs=[output_table, download_link]
    )

demo.launch()


* Running on local URL:  http://127.0.0.1:7868

To create a public link, set `share=True` in `launch()`.




In [33]:
# !jupyter nbconvert --to script gradio_v1.ipynb