# 下载模型与数据集

识别模型：ds4sd/docling-models

In [None]:
import os

# 设置镜像地址
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
from huggingface_hub import snapshot_download
from huggingface_hub.utils import HfHubHTTPError


def download_model(target_dir, repo_id):
    """
    将指定的 HuggingFace 仓库下载到本地目录。
    
    参数:
        target_dir (str): 本地保存模型的路径。
        repo_id (str): HuggingFace 上的仓库 ID。
    """
    print(f"准备开始下载 '{repo_id}' ...")
    print(f"目标保存路径: {os.path.abspath(target_dir)}")

    try:
        # snapshot_download 会下载整个仓库
        # local_dir_use_symlinks=False 确保下载的是实际文件而不是缓存的软链接
        local_path = snapshot_download(
            repo_id=repo_id,
            local_dir=target_dir,
            local_dir_use_symlinks=False,
            resume_download=True,  # 支持断点续传
            # max_workers=4        # 可选：如果网速允许，增加并发数
        )
        print(f"✅ 下载成功！文件已保存在: {local_path}")

    except HfHubHTTPError as e:
        print(f"❌ 下载失败 (网络或权限错误): {e}")
    except Exception as e:
        print(f"❌ 发生未知错误: {e}")


下载向量模型:Qwen/Qwen3-Embedding-8B

In [None]:
target_dir = "/root/autodl-tmp/model/qwen3-8b"
repo_id = "Qwen/Qwen3-Embedding-8B"
download_model(target_dir=target_dir, repo_id=repo_id)

# Pipeline

In [None]:
from pipeline import RunConfig, PipelineConfig, Pipeline

preprocess_configs = {"ser_tab": RunConfig(use_serialized_tables=True),
                      "no_ser_tab": RunConfig(use_serialized_tables=False)}

max_st_qwenturbo8k_reasoning_config = RunConfig(
    use_serialized_tables=False,
    parent_document_retrieval=True,
    llm_reranking=True,
    parallel_requests=1,
    submission_name="",
    pipeline_details="",
    api_provider="qwen",
    answering_model="qwen-turbo",
    config_suffix="_max_qwen-turbo1-llmre-reasoning"
)

In [None]:
from pyprojroot import here

root_path = here() / "data" / "test_set"
print("root_path:", root_path)
pipeline = Pipeline(root_path,
                    run_config=max_st_qwenturbo8k_reasoning_config
                    )

此方法将PDF报告解析为JSON文件。它会在debug/data_01_parsed_reports目录中创建JSON文件。这些JSON文件将在后续步骤中使用
它还会将文档生成的原始输出存储在 debug/data_01_parsed_reports_debug目录中。这些JSON文件包含大量元数据，但不会被使用

In [None]:
import os

# 设置镜像地址
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
pipeline.parse_pdf_reports_sequential()

此方法将debug/data_01_parsed_reports中的JSON转换为更简单的JSON，即Markdown格式的页面列表
新的JSON文件位于debug/data_02_merged_reports中

In [None]:
pipeline.merge_reports()

此方法将报告导出为纯 Markdown 格式。这些报告仅用于审阅和全文搜索配置：gemini_thinking_config
新文件位于 debug/data_03_reports_markdown 目录下

In [None]:
pipeline.export_reports_to_markdown()

此方法将报告分割成多个数据块，用于向量化处理
新的 JSON 文件位于 databases/chunked_reports 目录中。

In [None]:
pipeline.chunk_reports()

此方法从分块报告中创建向量数据库
新文件位于 databases/vector_dbs 目录中。

In [None]:
pipeline.create_vector_dbs()

此方法处理问题和答案
问题处理逻辑取决于run_config

In [None]:
pipeline.process_questions()

In [None]:
from raptor.SummarizationModels import QwenSummarizationModel

model = QwenSummarizationModel()
summarize = model.summarize("""The wife of a rich man fell sick, and as she felt that her end
was drawing near, she called her only daughter to her bedside and
said, dear child, be good and pious, and then the
good God will always protect you, and I will look down on you
from heaven and be near you.  Thereupon she closed her eyes and
departed.  Every day the maiden went out to her mother's grave,
and wept, and she remained pious and good.  When winter came
the snow spread a white sheet over the grave, and by the time the
spring sun had drawn it off again, the man had taken another wife.
The woman had brought with her into the house two daughters,
who were beautiful and fair of face, but vile and black of heart.
Now began a bad time for the poor step-child.  Is the stupid goose
to sit in the parlor with us, they said.  He who wants to eat bread
must earn it.  Out with the kitchen-wench.  They took her pretty
clothes away from her, put an old grey bedgown on her, and gave
her wooden shoes.  Just look at the proud princess, how decked
out she is, they cried, and laughed, and led her into the kitchen.
There she had to do hard work from morning till night, get up
before daybreak, carry water, light fires, cook and wash.  Besides
this, the sisters did her every imaginable injury - they mocked her
and emptied her peas and lentils into the ashes, so that she was
forced to sit and pick them out again.  In the evening when she had
worked till she was weary she had no bed to go to, but had to sleep
by the hearth in the cinders.  And as on that account she always
looked dusty and dirty, they called her cinderella.
It happened that the father was once going to the fair, and he
asked his two step-daughters what he should bring back for them.
Beautiful dresses, said one, pearls and jewels, said the second.
And you, cinderella, said he, what will you have.  Father
break off for me the first branch which knocks against your hat on
your way home.  So he bought beautiful dresses, pearls and jewels
for his two step-daughters, and on his way home, as he was riding
through a green thicket, a hazel twig brushed against him and
knocked off his hat.  Then he broke off the branch and took it with
him.  When he reached home he gave his step-daughters the things
which they had wished for, and to cinderella he gave the branch
from the hazel-bush.  Cinderella thanked him, went to her mother's
grave and planted the branch on it, and wept so much that the tears
fell down on it and watered it.  And it grew and became a handsome
tree. Thrice a day cinderella went and sat beneath it, and wept and
prayed, and a little white bird always came on the tree, and if
cinderella expressed a wish, the bird threw down to her what she
had wished for.
It happened, however, that the king gave orders for a festival
which was to last three days, and to which all the beautiful young
girls in the country were invited, in order that his son might choose
himself a bride.  When the two step-sisters heard that they too were
to appear among the number, they were delighted, called cinderella
and said, comb our hair for us, brush our shoes and fasten our
buckles, for we are going to the wedding at the king's palace.
Cinderella obeyed, but wept, because she too would have liked to
go with them to the dance, and begged her step-mother to allow
her to do so.  You go, cinderella, said she, covered in dust and
dirt as you are, and would go to the festival.  You have no clothes
and shoes, and yet would dance.  As, however, cinderella went on
asking, the step-mother said at last, I have emptied a dish of
lentils into the ashes for you, if you have picked them out again in
two hours, you shall go with us.  The maiden went through the
back-door into the garden, and called, you tame pigeons, you
turtle-doves, and all you birds beneath the sky, come and help me
to pick
     the good into the pot,
     the bad into the crop.
Then two white pigeons came in by the kitchen window, and
afterwards the turtle-doves, and at last all the birds beneath the
sky, came whirring and crowding in, and alighted amongst the ashes.
And the pigeons nodded with their heads and began pick, pick,
pick, pick, and the rest began also pick, pick, pick, pick, and
gathered all the good grains into the dish.  Hardly had one hour
passed before they had finished, and all flew out again.  Then the
girl took the dish to her step-mother, and was glad, and believed
that now she would be allowed to go with them to the festival.
But the step-mother said, no, cinderella, you have no clothes and
you can not dance.  You would only be laughed at.  And as
cinderella wept at this, the step-mother said, if you can pick two
dishes of lentils out of the ashes for me in one hour, you shall go
with us.  And she thought to herself, that she most certainly
cannot do again.  When the step-mother had emptied the two
dishes of lentils amongst the ashes, the maiden went through the
back-door into the garden and cried, you tame pigeons, you
turtle-doves, and all you birds beneath the sky, come and help me
to pick
     the good into the pot,
     the bad into the crop.
Then two white pigeons came in by the kitchen-window, and
afterwards the turtle-doves, and at length all the birds beneath the
sky, came whirring and crowding in, and alighted amongst the
ashes.  And the doves nodded with their heads and began pick,
pick, pick, pick, and the others began also pick, pick, pick, pick,
and gathered all the good seeds into the dishes, and before half an
hour was over they had already finished, and all flew out again.
Then the maiden was delighted, and believed that she might now go
with them to the wedding.  But the step-mother said, all this will
not help.  You cannot go with us, for you have no clothes and can
not dance.  We should be ashamed of you.  On this she turned her
back on cinderella, and hurried away with her two proud daughters.
As no one was now at home, cinderella went to her mother's
grave beneath the hazel-tree, and cried -
     shiver and quiver, little tree,
     silver and gold throw down over me.
Then the bird threw a gold and silver dress down to her, and
slippers embroidered with silk and silver.  She put on the dress
with all speed, and went to the wedding.  Her step-sisters and the
step-mother however did not know her, and thought she must be a
foreign princess, for she looked so beautiful in the golden dress.
They never once thought of cinderella, and believed that she was
sitting at home in the dirt, picking lentils out of the ashes.  The
prince approached her, took her by the hand and danced with her.
He would dance with no other maiden, and never let loose of her
hand, and if any one else came to invite her, he said, this is my
partner.
She danced till it was evening, and then she wanted to go home.
But the king's son said, I will go with you and bear you company,
for he wished to see to whom the beautiful maiden belonged.
She escaped from him, however, and sprang into the
pigeon-house.  The king's son waited until her father came, and
then he told him that the unknown maiden had leapt into the
pigeon-house.  The old man thought, can it be cinderella.  And
they had to bring him an axe and a pickaxe that he might hew
the pigeon-house to pieces, but no one was inside it.  And when they
got home cinderella lay in her dirty clothes among the ashes, and
a dim little oil-lamp was burning on the mantle-piece, for
cinderella had jumped quickly down from the back of the pigeon-house
and had run to the little hazel-tree, and there she had taken off
her beautiful clothes and laid them on the grave, and the bird had
taken them away again, and then she had seated herself in the
kitchen amongst the ashes in her grey gown.
Next day when the festival began afresh, and her parents and
the step-sisters had gone once more, cinderella went to the
hazel-tree and said -
     shiver and quiver, my little tree,
     silver and gold throw down over me.
Then the bird threw down a much more beautiful dress than on
the preceding day. And when cinderella appeared at the wedding
in this dress, every one was astonished at her beauty.  The king's
son had waited until she came, and instantly took her by the hand
and danced with no one but her.  When others came and invited
her, he said, this is my partner.  When evening came she wished
to leave, and the king's son followed her and wanted to see into
which house she went.  But she sprang away from him, and into
the garden behind the house.  Therein stood a beautiful tall tree on
which hung the most magnificent pears.  She clambered so nimbly
between the branches like a squirrel that the king's son did not
know where she was gone.  He waited until her father came, and
said to him, the unknown maiden has escaped from me, and I
believe she has climbed up the pear-tree.  The father thought,
can it be cinderella.  And had an axe brought and cut the
tree down, but no one was on it.  And when they got into the
kitchen, cinderella lay there among the ashes, as usual, for she
had jumped down on the other side of the tree, had taken the
beautiful dress to the bird on the little hazel-tree, and put on her
grey gown.
On the third day, when the parents and sisters had gone away,
cinderella went once more to her mother's grave and said to the
little tree -
     shiver and quiver, my little tree,
     silver and gold throw down over me.
And now the bird threw down to her a dress which was more
splendid and magnificent than any she had yet had, and the
slippers were golden.  And when she went to the festival in the
dress, no one knew how to speak for astonishment.  The king's son
danced with her only, and if any one invited her to dance, he said
this is my partner.
When evening came, cinderella wished to leave, and the king's
son was anxious to go with her, but she escaped from him so quickly
that he could not follow her.  The king's son, however, had
employed a ruse, and had caused the whole staircase to be smeared
with pitch, and there, when she ran down, had the maiden's left
slipper remained stuck.  The king's son picked it up, and it was
small and dainty, and all golden.  Next morning, he went with it to
the father, and said to him, no one shall be my wife but she whose
foot this golden slipper fits.  Then were the two sisters glad,
for they had pretty feet.  The eldest went with the shoe into her
room and wanted to try it on, and her mother stood by.  But she
could not get her big toe into it, and the shoe was too small for
her.  Then her mother gave her a knife and said, cut the toe off,
when you are queen you will have no more need to go on foot.  The
maiden cut the toe off, forced the foot into the shoe, swallowed
the pain, and went out to the king's son.  Then he took her on his
his horse as his bride and rode away with her.  They were
obliged, however, to pass the grave, and there, on the hazel-tree,
sat the two pigeons and cried -
     turn and peep, turn and peep,
     there's blood within the shoe,
     the shoe it is too small for her,
     the true bride waits for you.
Then he looked at her foot and saw how the blood was trickling
from it.  He turned his horse round and took the false bride
home again, and said she was not the true one, and that the
other sister was to put the shoe on.  Then this one went into her
chamber and got her toes safely into the shoe, but her heel was
too large.  So her mother gave her a knife and said,  cut a bit
off your heel, when you are queen you will have no more need
to go on foot.  The maiden cut a bit off her heel, forced
her foot into the shoe, swallowed the pain, and went out to the
king's son.  He took her on his horse as his bride, and rode away
with her, but when they passed by the hazel-tree, the two pigeons
sat on it and cried -
     turn and peep, turn and peep,
     there's blood within the shoe,
     the shoe it is too small for her,
     the true bride waits for you.
He looked down at her foot and saw how the blood was running
out of her shoe, and how it had stained her white stocking quite
red.  Then he turned his horse and took the false bride home
again.  This also is not the right one, said he, have you no
other daughter.  No, said the man, there is still a little
stunted kitchen-wench which my late wife left behind her, but
she cannot possibly be the bride.  The king's son said he was
to send her up to him, but the mother answered, oh, no, she is
much too dirty, she cannot show herself.  But he absolutely
insisted on it, and cinderella had to be called.  She first
washed her hands and face clean, and then went and bowed down
before the king's son, who gave her the golden shoe.  Then she
seated herself on a stool, drew her foot out of the heavy
wooden shoe, and put it into the slipper, which fitted like a
glove.  And when she rose up and the king's son looked at her
face he recognized the beautiful maiden who had danced with
him and cried, that is the true bride.  The step-mother and
the two sisters were horrified and became pale with rage, he,
however, took cinderella on his horse and rode away with her.  As
they passed by the hazel-tree, the two white doves cried -
     turn and peep, turn and peep,
     no blood is in the shoe,
     the shoe is not too small for her,
     the true bride rides with you,
and when they had cried that, the two came flying down and
placed themselves on cinderella's shoulders, one on the right,
the other on the left, and remained sitting there.
When the wedding with the king's son was to be celebrated, the
two false sisters came and wanted to get into favor with
cinderella and share her good fortune.  When the betrothed
couple went to church, the elder was at the right side and the
younger at the left, and the pigeons pecked out one eye from
each of them.  Afterwards as they came back the elder was at
the left, and the younger at the right, and then the pigeons
pecked out the other eye from each.  And thus, for their
wickedness and falsehood, they were punished with blindness
all their days.
""")
print(summarize)