In [2]:
!pip install transformers -U
!pip install llama-index llama-index-llms-huggingface ipywidgets
!pip install sentence-transformers

Looking in indexes: https://mirrors.aliyun.com/pypi/simple
[33mDEPRECATION: pytorch-lightning 1.7.7 has a non-standard dependency specifier torch>=1.9.*. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0mLooking in indexes: https://mirrors.aliyun.com/pypi/simple
[33mDEPRECATION: pytorch-lightning 1.7.7 has a non-standard dependency specifier torch>=1.9.*. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0mLooking in indexes: https://mirrors.aliyun.com/pypi/simple
Collecting sentence-t

In [4]:
!wget https://modelscope.oss-cn-beijing.aliyuncs.com/resource/rag/xianjiaoda.md
!mkdir -p /mnt/workspace/custom_data
!mv /mnt/workspace/xianjiaoda.md /mnt/workspace/custom_data

--2024-04-08 13:45:21--  https://modelscope.oss-cn-beijing.aliyuncs.com/resource/rag/xianjiaoda.md
正在解析主机 modelscope.oss-cn-beijing.aliyuncs.com (modelscope.oss-cn-beijing.aliyuncs.com)... 8.131.208.119
正在连接 modelscope.oss-cn-beijing.aliyuncs.com (modelscope.oss-cn-beijing.aliyuncs.com)|8.131.208.119|:443... 已连接。
已发出 HTTP 请求，正在等待回应... 200 OK
长度： 13228 (13K) [text/markdown]
正在保存至: ‘xianjiaoda.md’


2024-04-08 13:45:21 (227 MB/s) - 已保存 ‘xianjiaoda.md’ [13228/13228])



In [1]:
import logging
import sys
from abc import ABC
from typing import Any, List

import pandas as pd
import torch
from IPython.display import display, HTML
from llama_index.core import QueryBundle
from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    Settings,
    ServiceContext,
    set_global_service_context,
)
from llama_index.core.base.embeddings.base import BaseEmbedding, Embedding
from llama_index.core.postprocessor import SentenceTransformerRerank
from llama_index.core.prompts import PromptTemplate
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.llms.huggingface import HuggingFaceLLM
from modelscope import snapshot_download
from transformers import AutoModelForSequenceClassification, AutoTokenizer

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

# download QWEN model from modelscope
qwen15_4B_CHAT = "qwen/Qwen1.5-4B-Chat"
selected_model = snapshot_download(qwen15_4B_CHAT)

# define sys prompt
SYSTEM_PROMPT = """You are a helpful AI assistant."""
query_wrapper_prompt = PromptTemplate(
    "[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
)

# create HuggingFaceLLM with qwen1.5
llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=2048,
    generate_kwargs={"temperature": 0.0, "do_sample": False},
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=selected_model,
    model_name=selected_model,
    device_map="auto",
    # change these settings below depending on your GPU
    model_kwargs={"torch_dtype": torch.float16},
)
print("llm created")

rerank_llm_name = "AI-ModelScope/bge-reranker-v2-m3"
downloaded_rerank_model = snapshot_download(rerank_llm_name)
rerank_llm = SentenceTransformerRerank(model=downloaded_rerank_model, top_n=3)
print("rerank_llm created", rerank_llm_name)


# wrap modelscope embedding for llama-index (based on BaseEmbedding)
class ModelScopeEmbeddings4LlamaIndex(BaseEmbedding, ABC):
    embed: Any = None
    model_id: str = "damo/nlp_gte_sentence-embedding_chinese-base"

    def __init__(
            self,
            model_id: str,
            **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        try:
            from modelscope.models import Model
            from modelscope.pipelines import pipeline
            from modelscope.utils.constant import Tasks
            # 使用modelscope的embedding模型（包含下载）
            self.embed = pipeline(Tasks.sentence_embedding, model=self.model_id)

        except ImportError as e:
            raise ValueError(
                "Could not import some python packages." "Please install it with `pip install modelscope`."
            ) from e

    def _get_query_embedding(self, query: str) -> Embedding:
        text = query.replace("\n", " ")
        inputs = {"source_sentence": [text]}
        return self.embed(input=inputs)['text_embedding'][0].tolist()

    def _get_text_embedding(self, text: str) -> Embedding:
        text = text.replace("\n", " ")
        inputs = {"source_sentence": [text]}
        return self.embed(input=inputs)['text_embedding'][0].tolist()

    def _get_text_embeddings(self, texts: List[str]) -> List[Embedding]:
        texts = list(map(lambda x: x.replace("\n", " "), texts))
        inputs = {"source_sentence": texts}
        result = self.embed(input=inputs)['text_embedding']
        #print('@@@@DEBUG2, type = ', type(result))
        return result.tolist()

    async def _aget_query_embedding(self, query: str) -> Embedding:
        return self._get_query_embedding(query)


embedding_model = "damo/nlp_gte_sentence-embedding_chinese-base"
embeddings = ModelScopeEmbeddings4LlamaIndex(model_id=embedding_model)
service_context = ServiceContext.from_defaults(embed_model=embeddings, llm=llm)
set_global_service_context(service_context)
Settings.embed_model = embeddings

# load example documents
#documents = SimpleDirectoryReader("/mnt/workspace/data/paul_graham/").load_data()
documents = SimpleDirectoryReader("/mnt/workspace/custom_data/").load_data()

# create Vector DB
index = VectorStoreIndex.from_documents(documents)

# set Logging to DEBUG for more detailed outputs
#query_engine = index.as_query_engine(similarity_top_k=10, node_postprocessors=[rerank_llm])

# response = query_engine.query("ModelScope上模型涵盖了哪些领域？")
# print(response)
#response = query_engine.query("西安交大由哪几个学校组成")
#print(response)




2024-04-08 14:07:47,302 - modelscope - INFO - PyTorch version 2.1.2+cu121 Found.
2024-04-08 14:07:47,305 - modelscope - INFO - TensorFlow version 2.14.0 Found.
2024-04-08 14:07:47,305 - modelscope - INFO - Loading ast index from /mnt/workspace/.cache/modelscope/ast_indexer
2024-04-08 14:07:47,336 - modelscope - INFO - Loading done! Current index file version is 1.13.3, with md5 55e7043102d017111a56be6e6d7a6a16 and a total number of 972 components indexed


INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


llm created
rerank_llm created AI-ModelScope/bge-reranker-v2-m3
INFO:datasets:PyTorch version 2.1.2+cu121 available.
PyTorch version 2.1.2+cu121 available.
INFO:datasets:TensorFlow version 2.14.0 available.
TensorFlow version 2.14.0 available.


2024-04-08 14:09:01,483 - modelscope - INFO - initiate model from /mnt/workspace/.cache/modelscope/damo/nlp_gte_sentence-embedding_chinese-base
2024-04-08 14:09:01,484 - modelscope - INFO - initiate model from location /mnt/workspace/.cache/modelscope/damo/nlp_gte_sentence-embedding_chinese-base.
2024-04-08 14:09:01,485 - modelscope - INFO - initialize model from /mnt/workspace/.cache/modelscope/damo/nlp_gte_sentence-embedding_chinese-base
  return self.fget.__get__(instance, owner)()
  service_context = ServiceContext.from_defaults(embed_model=embeddings, llm=llm)


In [2]:
from time import time

query_engine = index.as_query_engine(similarity_top_k=10, node_postprocessors=[rerank_llm])

now = time()
response = query_engine.query("西安交大由哪几个学校组成")
print(response)
print(f"Elapsed: {round(time() - now, 2)}s")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]



2000年国务院决定将西安交通大学、西安医科大学、陕西财经学院三校合并，组建新的西安交通大学
Elapsed: 2.34s


In [2]:
print(response.get_formatted_sources(length=200))

> Source (Doc id: c343ec5c-76ee-4ff5-88c2-c9d719b4adfd): 2000年国务院决定将西安交通大学、西安医科大学、陕西财经学院三校合并，组建新的西安交通大学。

学校是“七五”“八五”重点建设单位，首批进入国家“211”和“985”工程建设学校。2017 年入选国家一流大学建设名单 A 类建设高校，2022 年入选国家第二轮“双一流”建设高校，8 个学科入选“双一流”建设学科。据 ESI 公布的数据，截至 2023 年 5 月，学校 17 个学科进入世界...

> Source (Doc id: 3ee92ae7-b3e4-4af3-a572-28c28635932c): 西安交通大学是我国最早兴办、享誉海内外的著名高等学府，是教育部直属重点大学。西迁以来，一代代交大人扎根西部、服务国家，为西部发展和国家建设作出了卓越贡献，以实际行动铸就了第一批纳入中国共产党人精神谱系的西迁精神。2017年12月，习近平总书记对学校15位老教授来信作出重要指示。在2018年新年贺词中，习近平总书记再次提到“西安交大西迁的老教授”。2020年4月22日，习近平总书记来校考察并发...

> Source (Doc id: 5f49b220-ebe0-483f-b000-7d55289a8285): “大先生”系列话剧、金色梧桐节等一批文化品牌深入人心。获评全国文明校园、全国高校“礼敬中华优秀传统文化”十大示范项目、首批中华优秀传统文化传承基地等。

学校拥有国家大学生文化素质教育基地和156个学生社团，科技、文艺、体育等活动丰富多彩，“九州名家”“纵论四海”“新港报告”“创源论坛”等成为师生开拓视野的经典品牌。历年来，交大学子在SAE国际航空设计大赛、VEX机器人世界锦标赛、国际数学建...


In [2]:
from time import time
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine(similarity_top_k=10)

# response = query_engine.query("ModelScope上模型涵盖了哪些领域？")
# print(response)
now = time()
response = query_engine.query("西安交大由哪几个学校组成")
print(response)
print(f"Elapsed: {round(time() - now, 2)}s")



2000年国务院决定将西安交通大学、西安医科大学、陕西财经学院三校合并，组建新的西安交通大学
Elapsed: 4.49s
