<a href="https://colab.research.google.com/github/musicness/AI_Based_Reporting_mongoDB/blob/main/Welcome_To_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install bitsandbytes
!pip install transformers accelerate
!pip install -U bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.1-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Downloading bitsandbytes-0.45.1-py3-none-manylinux_2_24_x86_64.whl (69.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 MB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.1


In [2]:
!pip install transformers



In [7]:
#Input
user_input = input("Enter your query ")
print("You entered:", user_input)


Enter your query tell me list vendor than 1000
You entered: tell me list vendor than 1000


In [8]:
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
)
import torch
from peft import PeftModel

db_schema = '''{
  "collections": [
    {
      "name": "vendors",
      "indexes": [
        {
          "key": {
            "_id": 1
          }
        },
        {
          "key": {
            "vendor_name": 1
          }
        },
        {
          "key": {
            "transaction_date": 1
          }
        },
        {
          "key": {
            "invoice_amount": 1
          }
        }
      ],
      "uniqueIndexes": [],
      "document": {
        "properties": {
          "_id": {
            "bsonType": "string"
          },
          "vendor_name": {
            "bsonType": "string"
          },
          "invoice_amount": {
            "bsonType": "double"
          },
          "transaction_date": {
            "bsonType": "date"
          },
          "contact_details": {
            "bsonType": "object",
            "properties": {
              "email": {
                "bsonType": "string"
              },
              "phone": {
                "bsonType": "string"
              }
            }
          }
        }
      }
    }
  ],
  "version": 1
}
'''

prompt = f"""<s>
        Task Description:
        Your task is to create a MongoDB query that accurately fulfills the provided Instruct while strictly adhering to the given MongoDB schema. Ensure that the query solely relies on keys and columns present in the schema. Minimize the usage of lookup operations wherever feasible to enhance query efficiency.

        MongoDB Schema:
        {db_schema}

        ### Instruct:
        {user_input}

        ### Output:
        """

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base_model_id = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(base_model_id, use_fast=True)
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    trust_remote_code=True,
    quantization_config=bnb_config,
    revision="refs/pr/23",
    device_map={"": 0},
    torch_dtype="auto",
    flash_attn=True,
    flash_rotary=True,
    fused_dense=True,
)
adapter = 'Chirayu/phi-2-mongodb'

model = PeftModel.from_pretrained(model, adapter).to(device)
model_inputs = tokenizer(prompt, return_tensors="pt").to(device)
output = model.generate(
    **model_inputs,
    max_length=1024,
    no_repeat_ngram_size=10,
    repetition_penalty=1.02,
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id,
)[0]

prompt_length = model_inputs['input_ids'].shape[1]
query = tokenizer.decode(output[prompt_length:], skip_special_tokens=False)
try:
    stop_idx = query.index("</s>")
except Exception as e:
    print(e)
    stop_idx = len(query)
print(query[: stop_idx].strip())


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

db.vendors.find({ invoice_amount: { $gt: 1000 } }, { vendor_name: 1, _id: 0 })


In [9]:
!pip install pymongo

Collecting pymongo
  Downloading pymongo-4.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (22 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Downloading dnspython-2.7.0-py3-none-any.whl.metadata (5.8 kB)
Downloading pymongo-4.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.7 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━[0m [32m1.5/1.7 MB[0m [31m45.9 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m31.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dnspython-2.7.0-py3-none-any.whl (313 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/313.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m313.6/313.6 kB[0m [31m25.6 MB/s[0m eta [36m0:00:00[0m
[?25hIn

In [23]:
# code to attach to database and creae some data , to run this first need to configure the mongo db

# from pymongo import MongoClient
# from datetime import datetime, timedelta
# import random

# # Replace with your Atlas connection string
# client = MongoClient("mongodb+srv://ayushlaware:Ayushlaware@cluster0.szugo.mongodb.net/")
# db = client["vendorData"]
# collection = db["vendors"]

# def random_date(start, end):
#     return start + timedelta(days=random.randint(0, (end - start).days))

# vendors = ["Vendor A", "Vendor B", "Vendor C", "Vendor D"]
# start_date = datetime.now() - timedelta(days=90)
# end_date = datetime.now()

# records = []

# for _ in range(200):
#     vendor = random.choice(vendors)
#     transaction_date = random_date(start_date, end_date)
#     invoice_amount = round(random.uniform(100, 10000), 2)  # Random amount between 100 and 10000

#     records.append({
#         "vendor": vendor,
#         "transaction_date": transaction_date,
#         "invoice_amount": invoice_amount
#     })
# collection.insert_many(records)
# print("Operation successful")



In [24]:
#this function is to query the datbase

# db_name = "vendorData"
# collection_name = "vendors"
# uri = "mongodb://localhost:27017/"

# def read_mongo_query(response):

#     try:
#         client = MongoClient(uri)
#         db = client[db_name]
#         collection = db[collection_name]

#         response=response.split()
#         response.pop(0)
#         response=".".join(response)
#         # Execute the query
#         results = eval(f"collection.{response}") # Evaluate the query string

#         return list(results)

#     except Exception as e:
#         print(f"Error executing MongoDB query: {e}")
#         return []


Visulaztion


In [25]:
# #this is a form of sample visuzation for the query: Sales trend
# import matplotlib.pyplot as plt

# def visualize_key_value(data):
#     if isinstance(data, dict):
#         keys = list(data.keys())
#         values = list(data.values())

#     # Create a bar plot
#     plt.figure(figsize=(8, 6))  # Set figure size
#     plt.bar(keys, values, color='skyblue')

#     # Add labels and title
#     plt.xlabel('Keys')
#     plt.ylabel('Values')
#     plt.title('Key-Value Pair Visualization')

#     # Show the plot
#     plt.show()

In [None]:
# visualize_key_value(read_mongo_query(response))