Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 25 additions & 12 deletions webui/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,17 +105,20 @@ def sum_tokens(client):
env = {
"SYNTHESIZER_BASE_URL": arguments[12],
"SYNTHESIZER_MODEL": arguments[13],
"TRAINEE_BASE_URL": arguments[12],
"TRAINEE_BASE_URL": arguments[20],
"TRAINEE_MODEL": arguments[14],
"SYNTHESIZER_API_KEY": arguments[15],
"TRAINEE_API_KEY": arguments[15],
"TRAINEE_API_KEY": arguments[21],
"RPM": arguments[17],
"TPM": arguments[18],
}

# Test API connection
test_api_connection(env["SYNTHESIZER_BASE_URL"],
env["SYNTHESIZER_API_KEY"], env["SYNTHESIZER_MODEL"])
if config['if_trainee_model']:
test_api_connection(env["TRAINEE_BASE_URL"],
env["TRAINEE_API_KEY"], env["TRAINEE_MODEL"])

# Initialize GraphGen
graph_gen = init_graph_gen(config, env)
Expand Down Expand Up @@ -278,20 +281,32 @@ def sum_tokens(client):
interactive=True)

with gr.Accordion(label=_("Model Config"), open=False):
base_url = gr.Textbox(label="Base URL",
synthesizer_url = gr.Textbox(label="Synthesizer URL",
value="https://api.siliconflow.cn/v1",
info=_("Base URL Info"),
info=_("Synthesizer URL Info"),
interactive=True)
synthesizer_model = gr.Textbox(label="Synthesizer Model",
value="Qwen/Qwen2.5-7B-Instruct",
info=_("Synthesizer Model Info"),
interactive=True)
trainee_url = gr.Textbox(label="Trainee URL",
value="https://api.siliconflow.cn/v1",
info=_("Trainee URL Info"),
interactive=True,
visible=if_trainee_model.value is True)
trainee_model = gr.Textbox(
label="Trainee Model",
value="Qwen/Qwen2.5-7B-Instruct",
info=_("Trainee Model Info"),
interactive=True,
visible=if_trainee_model.value is True)
trainee_api_key = gr.Textbox(
label=_("SiliconCloud Token for Trainee Model"),
type="password",
value="",
info="https://cloud.siliconflow.cn/account/ak",
visible=if_trainee_model.value is True)


with gr.Accordion(label=_("Generation Config"), open=False):
chunk_size = gr.Slider(label="Chunk Size",
Expand Down Expand Up @@ -428,12 +443,12 @@ def sum_tokens(client):
# Test Connection
test_connection_btn.click(
test_api_connection,
inputs=[base_url, api_key, synthesizer_model],
inputs=[synthesizer_url, api_key, synthesizer_model],
outputs=[])

if if_trainee_model.value:
test_connection_btn.click(test_api_connection,
inputs=[base_url, api_key, trainee_model],
inputs=[trainee_url, api_key, trainee_model],
outputs=[])

expand_method.change(lambda method:
Expand All @@ -443,11 +458,9 @@ def sum_tokens(client):
outputs=[max_extra_edges, max_tokens])

if_trainee_model.change(
lambda use_trainee: (gr.update(visible=use_trainee is True),
gr.update(visible=use_trainee is True),
gr.update(visible=use_trainee is True)),
lambda use_trainee: [gr.update(visible=use_trainee)] * 5,
inputs=if_trainee_model,
outputs=[trainee_model, quiz_samples, edge_sampling])
outputs=[trainee_url, trainee_model, quiz_samples, edge_sampling, trainee_api_key])

# 计算上传文件的token数
upload_file.change(
Expand All @@ -471,8 +484,8 @@ def sum_tokens(client):
if_trainee_model, upload_file, tokenizer, qa_form,
bidirectional, expand_method, max_extra_edges, max_tokens,
max_depth, edge_sampling, isolated_node_strategy,
loss_strategy, base_url, synthesizer_model, trainee_model,
api_key, chunk_size, rpm, tpm, quiz_samples, token_counter
loss_strategy, synthesizer_url, synthesizer_model, trainee_model,
api_key, chunk_size, rpm, tpm, quiz_samples, trainee_url, trainee_api_key, token_counter
],
outputs=[output, token_counter],
)
Expand Down
8 changes: 6 additions & 2 deletions webui/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
"Title": "✨Easy-to-use LLM Training Data Generation Framework✨",
"Intro": "is a framework for synthetic data generation guided by knowledge graphs, designed to tackle challenges for knowledge-intensive QA generation. \n\nBy uploading your text chunks (such as knowledge in agriculture, healthcare, or marine science) and filling in the LLM API key, you can generate the training data required by **[LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory)** and **[xtuner](https://github.com/InternLM/xtuner)** online. We will automatically delete user information after completion.",
"Use Trainee Model": "Use Trainee Model to identify knowledge blind spots, please keep disable for SiliconCloud",
"Base URL Info": "Base URL for the API, use SiliconFlow as default",
"Synthesizer URL Info": "Base URL for the Synthesizer Model API, use SiliconFlow as default",
"Trainee URL Info": "Base URL for the Trainee Model API, use SiliconFlow as default",
"Synthesizer Model Info": "Model for constructing KGs and generating QAs",
"Trainee Model Info": "Model for training",
"Model Config": "Model Configuration",
"Generation Config": "Generation Config",
"SiliconCloud Token": "SiliconCloud API Key",
"SiliconCloud Token for Trainee Model": "SiliconCloud API Key for Trainee Model",
"Test Connection": "Test Connection",
"Run GraphGen": "Run GraphGen",
"Upload File": "Upload File",
Expand All @@ -18,12 +20,14 @@
"Title": "✨开箱即用的LLM训练数据生成框架✨",
"Intro": "是一个基于知识图谱的数据合成框架,旨在知识密集型任务中生成问答。\n\n 上传你的文本块(如农业、医疗、海洋知识),填写 LLM api key,即可在线生成 **[LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory)**、**[xtuner](https://github.com/InternLM/xtuner)** 所需训练数据。结束后我们将自动删除用户信息。",
"Use Trainee Model": "使用Trainee Model来识别知识盲区,使用硅基流动时请保持禁用",
"Base URL Info": "调用模型API的URL,默认使用硅基流动",
"Synthesizer URL Info": "调用合成模型API的URL,默认使用硅基流动",
"Trainee URL Info": "调用学生模型API的URL,默认使用硅基流动",
"Synthesizer Model Info": "用于构建知识图谱和生成问答的模型",
"Trainee Model Info": "用于训练的模型",
"Model Config": "模型配置",
"Generation Config": "生成配置",
"SiliconCloud Token": "硅基流动 API Key",
"SiliconCloud Token for Trainee Model": "硅基流动 API Key (学生模型)",
"Test Connection": "测试接口",
"Run GraphGen": "运行GraphGen",
"Upload File": "上传文件",
Expand Down