In [None]:
!pip install llama-index langchain sentence-transformers faiss-cpu
import os
import json
from sentence_transformers import SentenceTransformer
import numpy as np

In [None]:
import json
def load_and_process_data(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)

    documents = []
    for day_dict in data:
      for day, entries in day_dict.items():
        for entry in entries:
            doc = {
                'content': entry,  # Store the full content
                'metadata': {
                    'day': day,
                    'device_id': entry.get('deviceid'),
                    'name': entry.get('name'),
                    'sensor_type': entry.get('sensortype')
                }
            }
            documents.append(doc)

    return documents

file_path = '/content/drive/MyDrive/Asta/iot_data.json'
documents = load_and_process_data(file_path)
print (type(documents[0].get('content')))

In [None]:
from llama_index.core import Document
document_objects = []
for doc in documents:
    content = json.dumps(doc['content'])
    # metadata = json.dumps(metadata)
    document_objects.append(Document(text=content, metadata=doc['metadata']))


In [None]:
!pip install llama-index langchain sentence-transformers faiss-cpu
!pip install langchain
!pip install llama-index-vector-stores-faiss
!pip install llama-index-embeddings-huggingface
import faiss
import json

from sentence_transformers import SentenceTransformer
from llama_index.core import Document, VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.core.storage.storage_context import StorageContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
d = 384
faiss_index = faiss.IndexFlatL2(d)      # 384 is the dimension for the chosen embedding model
vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents=document_objects, storage_context=storage_context,embed_model=embed_model)

In [None]:
def get_initial_info():
    day = input("Enter the day (e.g., Day1, Day2): ")
    room = input("Enter the room (e.g., room1, room2): ")
    sensor_name = input("Enter the sensor name (e.g., FAN1, LIGHT1): ")
    return day, room, sensor_name

def get_user_query():
    return input("What would you like to know about this sensor? ")

In [None]:
def filter_documents(documents, metadata_filter):
    filtered_docs = []
    for doc in documents:
        if all(doc['metadata'].get(k) == v for k, v in metadata_filter.items()):
            filtered_docs.append(doc)
    return filtered_docs

def process_query(index, day, room, sensor_name, user_query):
    # Construct metadata filter
    metadata_filter = {
        "day": day,
        "device_id": room,
        "name": sensor_name
    }

    # Filter documents based on metadata
    filtered_docs = filter_documents(documents, metadata_filter)
    print(f"Filtered documents count: {len(filtered_docs)}")

    # Convert filtered documents to Document instances
    filtered_document_objects = [Document(text=doc['content'], metadata=doc['metadata']) for doc in filtered_docs]
    print(f"Filtered document objects count: {len(filtered_document_objects)}")

    # Create a temporary index for the filtered documents
    temp_index = VectorStoreIndex.from_documents(documents=filtered_document_objects, storage_context=storage_context, embed_model=embed_model)
    print("Temporary index keys:", temp_index.index_struct.nodes_dict.keys())
    try:
        response = temp_index.as_query_engine(llm=llm).query(user_query)
        return response.response, response.source_nodes
    except KeyError as e:
        print(f"KeyError: {e}. The key was not found in the index_struct.nodes_dict.")
        return "An error occurred while processing the query.", []

    return response.response, response.source_nodes

In [None]:
!pip install -U bitsandbytes accelerate einops

In [None]:

!pip install llama-index-llms-huggingface
from llama_index.llms.huggingface import HuggingFaceLLM
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
name = "StabilityAI/stablelm-tuned-alpha-3b"
# Set auth token variable from hugging face
auth_token = ""

# Create tokenizer
tokenizer = AutoTokenizer.from_pretrained(name, cache_dir='./model/', use_auth_token=auth_token)

# Create model
model = AutoModelForCausalLM.from_pretrained(name, cache_dir='./model/'
                            , use_auth_token=auth_token, torch_dtype=torch.float16,
                            rope_scaling={"type": "dynamic", "factor": 2}, load_in_8bit=True)

llm = HuggingFaceLLM(context_window=4096,
                    max_new_tokens=256,
                    model=model,
                    tokenizer=tokenizer
                    model_kwargs={"temperature": 0.7, "top_p": 0.95})

In [None]:
def generate_answer(context, user_query):
    prompt = f"""Based on the following context about a sensor:

{context}

Please answer the following question:
{user_query}

Answer:"""

    return llm(prompt)

In [None]:
def main():
    # Get initial information
    day, room, sensor_name = get_initial_info()

    while True:
        # Get user query
        user_query = get_user_query()
        if user_query.lower() == 'quit':
            break

        # Process query and get context
        context, source_nodes = process_query(index, day, room, sensor_name, user_query)

        # Generate answer using LLM
        if context != "An error occurred while processing the query.":
            answer = generate_answer(context, user_query)
        else:
            answer = context

        print(f"\nAnswer: {answer}")
        print("\nSources:")
        for node in source_nodes:
            print(f"- {node.metadata}")
        print("---")

if __name__ == "__main__":
    main()