# Usage

## 1. Install necessary libs
```shell
!pip install modelscope
!pip install transformers -U
!pip install llama-index llama-index-llms-huggingface ipywidgets 
```

## 2. Download data files we need in this example
```shell
!wget https://modelscope.oss-cn-beijing.aliyuncs.com/resource/rag/punkt.zip
!wget https://modelscope.oss-cn-beijing.aliyuncs.com/resource/rag/stopwords.zip
!wget https://modelscope.oss-cn-beijing.aliyuncs.com/resource/rag/xianjiaoda.md

!mkdir -p /opt/conda/lib/python3.10/site-packages/llama_index/core/_static/nltk_cache/tokenizers
!mkdir -p /opt/conda/lib/python3.10/site-packages/llama_index/core/_static/nltk_cache/corpora

!cp /mnt/workspace/punkt.zip /opt/conda/lib/python3.10/site-packages/llama_index/core/_static/nltk_cache/tokenizers
!cp /mnt/workspace/stopwords.zip /opt/conda/lib/python3.10/site-packages/llama_index/core/_static/nltk_cache/corpora
!cd /opt/conda/lib/python3.10/site-packages/llama_index/core/_static/nltk_cache/tokenizers; unzip punkt.zip;
!cd /opt/conda/lib/python3.10/site-packages/llama_index/core/_static/nltk_cache/corpora; unzip stopwords.zip;


!mkdir -p /mnt/workspace/custom_data
!mv /mnt/workspace/xianjiaoda.md /mnt/workspace/custom_data

!cd /mnt/workspace
```

## 3. Go!

In [ ]:
!pip install modelscope
!pip install transformers -U
!pip install llama-index llama-index-llms-huggingface ipywidgets 

In [ ]:
!wget https://modelscope.oss-cn-beijing.aliyuncs.com/resource/rag/punkt.zip
!wget https://modelscope.oss-cn-beijing.aliyuncs.com/resource/rag/stopwords.zip
!wget https://modelscope.oss-cn-beijing.aliyuncs.com/resource/rag/xianjiaoda.md

!mkdir -p /opt/conda/lib/python3.10/site-packages/llama_index/core/_static/nltk_cache/tokenizers
!mkdir -p /opt/conda/lib/python3.10/site-packages/llama_index/core/_static/nltk_cache/corpora

!cp /mnt/workspace/punkt.zip /opt/conda/lib/python3.10/site-packages/llama_index/core/_static/nltk_cache/tokenizers
!cp /mnt/workspace/stopwords.zip /opt/conda/lib/python3.10/site-packages/llama_index/core/_static/nltk_cache/corpora
!cd /opt/conda/lib/python3.10/site-packages/llama_index/core/_static/nltk_cache/tokenizers; unzip punkt.zip;
!cd /opt/conda/lib/python3.10/site-packages/llama_index/core/_static/nltk_cache/corpora; unzip stopwords.zip;


!mkdir -p /mnt/workspace/custom_data
!mv /mnt/workspace/xianjiaoda.md /mnt/workspace/custom_data

!cd /mnt/workspace

In [ ]:
import logging
import sys
from abc import ABC
from typing import Any, List

import torch
from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    Settings,
    ServiceContext,
    set_global_service_context,
)
from llama_index.core.base.embeddings.base import BaseEmbedding, Embedding
from llama_index.core.prompts import PromptTemplate
from llama_index.llms.huggingface import HuggingFaceLLM

from modelscope import snapshot_download

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

# download QWEN model from modelscope
qwen15_4B_CHAT = "qwen/Qwen1.5-4B-Chat"
selected_model = snapshot_download(qwen15_4B_CHAT)

# define sys prompt
SYSTEM_PROMPT = """You are a helpful AI assistant."""
query_wrapper_prompt = PromptTemplate(
    "[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
)

# create HuggingFaceLLM with qwen1.5 
llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=2048,
    generate_kwargs={"temperature": 0.0, "do_sample": False},
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=selected_model,
    model_name=selected_model,
    device_map="auto",
    # change these settings below depending on your GPU
    model_kwargs={"torch_dtype": torch.float16},
)
print("llm created")


# wrap modelscope embedding for llama-index (based on BaseEmbedding)
class ModelScopeEmbeddings4LlamaIndex(BaseEmbedding, ABC):
    embed: Any = None
    model_id: str = "damo/nlp_gte_sentence-embedding_chinese-base"

    def __init__(
            self,
            model_id: str,
            **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        try:
            from modelscope.models import Model
            from modelscope.pipelines import pipeline
            from modelscope.utils.constant import Tasks
            # 使用modelscope的embedding模型（包含下载）
            self.embed = pipeline(Tasks.sentence_embedding, model=self.model_id)

        except ImportError as e:
            raise ValueError(
                "Could not import some python packages." "Please install it with `pip install modelscope`."
            ) from e

    def _get_query_embedding(self, query: str) -> Embedding:
        text = query.replace("\n", " ")
        inputs = {"source_sentence": [text]}
        # note that we have to call tolist() to change numpy.ndarray into python list
        return self.embed(input=inputs)['text_embedding'][0].tolist()

    def _get_text_embedding(self, text: str) -> Embedding:
        text = text.replace("\n", " ")
        inputs = {"source_sentence": [text]}
        return self.embed(input=inputs)['text_embedding'][0].tolist()

    def _get_text_embeddings(self, texts: List[str]) -> List[Embedding]:
        texts = list(map(lambda x: x.replace("\n", " "), texts))
        inputs = {"source_sentence": texts}
        return self.embed(input=inputs)['text_embedding'].tolist()

    async def _aget_query_embedding(self, query: str) -> Embedding:
        return self._get_query_embedding(query)


embedding_model = "damo/nlp_gte_sentence-embedding_chinese-base"
embeddings = ModelScopeEmbeddings4LlamaIndex(model_id=embedding_model)
service_context = ServiceContext.from_defaults(embed_model=embeddings, llm=llm)
set_global_service_context(service_context)
Settings.embed_model = embeddings

# load example documents
documents = SimpleDirectoryReader("/mnt/workspace/custom_data/").load_data()

# create Vector DB
index = VectorStoreIndex.from_documents(documents)

# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()

# do query
response = query_engine.query("西安较大的校训是什么")
print(response)
