### clone代码

In [None]:
!git clone https://github.com/modelscope/modelscope-agent.git

### 安装特定依赖

In [None]:
!cd modelscope-agent && !pip install -r requirements.txt
!pip install transformers -U
!pip install llama-index llama-index-llms-huggingface llama-index-embeddings-huggingface ipywidgets

In [None]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

import torch
from IPython.display import Markdown, display
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.prompts import PromptTemplate
from modelscope import snapshot_download
from llama_index.core import (
    VectorStoreIndex,
    ServiceContext,
    Settings,
    set_global_service_context,
    SimpleDirectoryReader)

### 加载大语言模型

In [None]:
# download QWEN model from modelscope
qwen15_4B_CHAT = "qwen/Qwen1.5-4B-Chat"
selected_model = snapshot_download(qwen15_4B_CHAT)

# define sys prompt
SYSTEM_PROMPT = """You are a helpful AI assistant."""
query_wrapper_prompt = PromptTemplate(
    "[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
)

# create HuggingFaceLLM with qwen1.5 
llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=2048,
    generate_kwargs={"temperature": 0.0, "do_sample": False},
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=selected_model,
    model_name=selected_model,
    device_map="auto",
    # change these settings below depending on your GPU
    model_kwargs={"torch_dtype": torch.float16}
)

### 加载数据

In [None]:
!mkdir -p 'data/xianjiaoda/'
!wget 'https://modelscope.oss-cn-beijing.aliyuncs.com/resource/rag/xianjiaoda.md' -O 'data/xianjiaoda/xianjiaoda.md'

In [None]:
# load example documents
documents = SimpleDirectoryReader("/mnt/workspace/data/xianjiaoda/").load_data()
documents

### 构建Embedding

In [None]:
# download BCE - Embedding model from modelscope
BCE_Embedding = "maidalun/bce-embedding-base_v1"
selected_embedding_model = snapshot_download(BCE_Embedding)

embed_args = {'model_name': selected_embedding_model, 'max_length': 512, 'embed_batch_size': 32, 'device': 'cuda:0'}
embed_model = HuggingFaceEmbedding(**embed_args)

### 建设索引

In [None]:
service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm)
set_global_service_context(service_context)
Settings.embed_model = embed_model

# create Vector DB
index = VectorStoreIndex.from_documents(documents)

### 查询和问答

In [None]:
query_engine = index.as_query_engine()

# do query
response = query_engine.query("西安交大由哪几个学校合并？")
response