In [2]:

import os
from pathlib import Path

Path('/workspace/.cache').mkdir(exist_ok=True)
os.environ['TRANSFORMERS_CACHE'] = '/workspace/.cache'

import pandas as pd
import torch
from transformers import pipeline
from langchain import PromptTemplate, LLMChain
from langchain.llms import HuggingFacePipeline
from sentence_transformers import SentenceTransformer, util
import gc
import torch

DOLLY_MODEL = ["databricks/dolly-v2-12b", 'mosaicml/mpt-7b-instruct'][1]
EMBEDDING_MODEL = ["all-mpnet-base-v2", "sentence-transformers/use-cmlm-multilingual"][1]

USE_DOLLY_FOR_EMBEDDING = False


In [3]:
generate_text = pipeline(model=DOLLY_MODEL, torch_dtype=torch.bfloat16,
                         trust_remote_code=True, device_map="auto", return_full_text=True, do_sample=False)
# template for an instruction with input
prompt_with_context = PromptTemplate(
    input_variables=["instruction", "context"],
    template="{instruction}\n\nInput:\n{context}")

hf_pipeline = HuggingFacePipeline(pipeline=generate_text)

llm_context_chain = LLMChain(llm=hf_pipeline, prompt=prompt_with_context)



Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.


In [3]:

if USE_DOLLY_FOR_EMBEDDING:
    embedding_model = SentenceTransformer(DOLLY_MODEL)
else:
    embedding_model = SentenceTransformer(EMBEDDING_MODEL)

Downloading (…)7729f/.gitattributes:   0%|          | 0.00/1.22k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

Downloading (…)a63d77729f/README.md:   0%|          | 0.00/1.85k [00:00<?, ?B/s]

Downloading (…)3d77729f/config.json:   0%|          | 0.00/804 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)7729f/tokenizer.json:   0%|          | 0.00/9.62M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

Downloading (…)a63d77729f/vocab.txt:   0%|          | 0.00/5.22M [00:00<?, ?B/s]

Downloading (…)d77729f/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

Some weights of the model checkpoint at /root/.cache/torch/sentence_transformers/sentence-transformers_use-cmlm-multilingual/ were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [5]:
df_text_chunks = pd.read_feather("../data/paper_extracts_embed.feature")
df_text_chunks.head()


Unnamed: 0,text_chunk,title,embeddings,embeddings_dolly,embeddings_use
0,Texture Synthesis Using Convolutional NeuralNe...,Texture Synthesis Using Convolutional Neural N...,"[0.01643215, 0.08131, -0.051866785, 0.07214568...","[-0.4477233, 0.7085075, 0.9448252, -0.6605596,...","[-0.11053032, 0.036556076, 0.029679747, 0.0063..."
1,2 Convolutional neural network We use the VGG...,Texture Synthesis Using Convolutional Neural N...,"[0.00494202, -0.0018913345, -6.29354e-05, 0.09...","[-0.16315894, 1.1724734, 0.34991983, -0.975609...","[-0.062598296, 0.03323136, -0.0032446731, -0.0..."
2,3 different features. These feature correlati...,Texture Synthesis Using Convolutional Neural N...,"[0.040369663, 0.036883876, -0.026579408, 0.090...","[-0.5808077, 0.93010116, 0.35125256, -0.081901...","[-0.060262118, 0.016366797, 0.020364584, 0.033..."
3,conv1_1pool1pool2pool3pool4originalPortilla &...,Texture Synthesis Using Convolutional Neural N...,"[-0.005054468, -0.011686467, -0.05832806, 0.06...","[-1.1286112, 0.94930226, 0.83858913, -0.496184...","[-0.07421292, -0.0026081826, 0.03418953, 0.043..."
4,6 Classification performance 1.00.80.60.4 top...,Texture Synthesis Using Convolutional Neural N...,"[0.02261693, -0.04751408, -0.008524225, 0.0593...","[-0.7846361, -0.06361116, 1.3205526, 1.9582235...","[-0.04890534, -0.0038033319, 0.05069413, 0.020..."


In [6]:


def get_similarity(embedding_model, query, doc_emb, docs):
    query_emb = embedding_model.encode(query)
    scores = util.dot_score(query_emb, doc_emb)[0].cpu().tolist()
    #Combine docs & scores
    doc_score_pairs = list(zip(docs, scores))
    #Sort by decreasing score
    doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
    return doc_score_pairs






In [22]:
if USE_DOLLY_FOR_EMBEDDING:
    doc_emb = df_text_chunks["embeddings_dolly"]
else:
    if EMBEDDING_MODEL == "sentence-transformers/use-cmlm-multilingual":
        doc_emb = df_text_chunks["embeddings_use"]
    else:
        doc_emb = df_text_chunks["embeddings"]

def get_context(query, top_n = 3, docs = df_text_chunks["text_chunk"], doc_emb=doc_emb, embedding_model=embedding_model):
    doc_score_pairs = get_similarity(embedding_model, query, doc_emb, docs)
    context_chunks = "\n\n".join([doc_score_pairs[i][0] for i in range(top_n)])
    return context_chunks


query = ["What is the goal of visual texture synthesis?",
         "what is VGG-19",
         "What is a Gram matrix?",
         "What is a MCGSM",
         "Describe a spatial LSTM", 
         "what is special about Larochelle & Murray's NADE model?",
         "What is RACV Years of Membership Benefits program?", 
         "What is insurance excess?",
         "When can you start repairs to your vehicle after an accident based on RACV insurance policy?"][-1]
context = get_context(query=query, top_n=2)

print("\n----------------With Context--------------------\n")
print(llm_context_chain.predict(instruction=query, context=context).lstrip())

print("\n----------------Without Context--------------------\n")
print(llm_context_chain.predict(instruction=query, context="").lstrip())

gc.collect()
torch.cuda.empty_cache()



----------------With Context--------------------

You can start repairs to your vehicle after an accident based on RACV insurance policy if the damage is not too extensive. You should get your vehicle inspected by a qualified mechanic as soon as possible to determine the extent of the damage. It is important to get the accident information, the vehicle information and the insurance information to the mechanic. The insurance information can be found on the RACV website.

If the damage to your vehicle is extensive, you will likely be looking at a replacement vehicle.

----------------Without Context--------------------

If the damage to your vehicle is not too severe, you can start repairs as soon as possible, but not later than 7 days after the accident. However, you should always get at least one estimate of the repair cost before starting the repairs.


In [20]:
context

'We are actively developing and expanding the dataset, please find the latest information on the project page: http://idl.baidu.com/FM-IQA.html2The results reported in this paper are obtained from a model trained on the first version of the dataset (asubset of the current version) which contains 120,360 images and 250,569 question-answer pairs. 2 \x0cWhat is the cat doing ? <BOA> Sitting on the umbrella SharedEmbeddingShared LSTM Fusing CNN IntermediateSoftmaxSitting on the umbrella <EOA> Figure 2: Illustration of the mQA model architecture. We input an image and a question about theimage (i.e. What is the cat doing?) to the model. The model is trained to generate the answer tothe question (i.e. Sitting on the umbrella). The weight matrix in the word embedding layers ofthe two LSTMs (one for the question and one for the answer) are shared. In addition, as in [25], thisweight matrix is also shared, in a transposed manner, with the weight matrix in the Softmax layer.Different colors in t



A Gram matrix is a type of matrix that is used in linear algebra to represent a inner product between vectors.  The inner product is computed from a set of vectors, called a basis, and a matrix that represents the basis.  The Gram matrix is used to represent the linear relationship between the basis vectors.  For example, the inner product of two vectors can be represented by the dot product of the corresponding vectors.  This dot product is computed by first applying the corresponding basis matrix to the two vectors.  The result of this computation is a number.  This number is called the inner product of the two vectors.  The inner product of two vectors can also be represented by a dot product of the two Gram matrices of the basis vectors.  The Gram matrix of a basis is a square matrix with the same dimensions as the basis.  The inner product of two vectors is the dot product of the corresponding vectors and the basis.  The inner product of two vectors can also be represented by a do