# MongoDB Query Generator using OpenAI

In [None]:
!pip install openai
!pip install python-dotenv

In [1]:
from openai import OpenAI
import os
from dotenv import load_dotenv

## Load the OpenAI API key

The OpenAI API key is stored in a `.env` file and load it here as an environment variable.

`OPENAI_API_KEY="sk-TZP9XNLsdfskh23423jh234"`

In [2]:
_ = load_dotenv()
openai_client = OpenAI()

In [3]:
GPT3_MODEL = "gpt-3.5-turbo-1106"
GPT4_MODEL = "gpt-4-1106-preview"

## Generate MongoDB Query using Chat Completion API

### Define system and user prompts

In [4]:
def get_system_prompt():
    return f"""You are a MongoDB expert with great expertise in writing MongoDB queries \
    for any given data to produce an expected output.
    """
    
def get_user_prompt(input_data, output_data):
    return f"""Your task is to write a MongoDB Query, specifically an aggregation pipeline\
    that would produce the expected output for the given input.

    You will always return a JSON response with the following fields.
    ```
    mongoDBQuery: The MongoDB aggregation pipeline to produce the expected output for a given input.\
    This field corresponds to just the list of stages in the aggregation pipeline \
    and shouldn't contain the "db.collection.aggregate" prefix.
    
    queryExplanation: A detailed explanation for the query that was returned.
    ```
    
    Input data: {input_data} 
    Expected output data: {output_data}
    """

### Utility function to invoke Chat Completion API

In [5]:
def get_mongodb_query(input_data, output_data, model=GPT3_MODEL):
    system_prompt = get_system_prompt()
    user_prompt = get_user_prompt(input_data, output_data)

    #print(f"System Prompt: {system_prompt}")
    #print(f"User Prompt: {user_prompt}")
    
    messages = []
    messages.append({"role": "system", "content": system_prompt})
    messages.append({"role": "user", "content": user_prompt})
    
    chat_completion = openai_client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0,
        response_format={"type": "json_object"}
    )

    print(f"Assistant Response:\n{chat_completion.choices[0].message.content}")

### Data

In [6]:
ex1_input_data = """
[
  { "name": "Sachin", "team": "India" },
  { "name": "Sourav", "team": "India" },
  { "name": "Lara", "team": "West Indies" }
]
"""

ex1_output_data = """
[
 { "team": India, "playerCount": 2 },
 { "team": "West Indies", "playerCount": 1 }
]
"""

### Generate Query

In [7]:
get_mongodb_query(ex1_input_data, ex1_output_data, GPT4_MODEL)

Assistant Response:

    {
        "mongoDBQuery": [
            {
                "$group": {
                    "_id": "$team",
                    "playerCount": { "$sum": 1 }
                }
            },
            {
                "$project": {
                    "_id": 0,
                    "team": "$_id",
                    "playerCount": 1
                }
            }
        ],
        "queryExplanation": "The aggregation pipeline consists of two stages. In the first stage, `$group`, documents are grouped by the `team` field, which is used as the `_id` in the grouping. For each group, we count the number of players using the `$sum` accumulator, incrementing the count by 1 for each document in the group, and store this count in the `playerCount` field. In the second stage, `$project`, we reshape each document in the stream: the `_id` field, which contains the team name, is projected to a `team` field, and we include the `playerCount` field. We also exclude the `_id

### Additional examples

Below are some additional examples to try out!

#### Example 2

Your task is to write a MongoDB aggregation pipeline to find the documents that have duplicates in the nested array "courses" and count the number of times those duplicate items are present in the array.  

In [None]:
ex2_input_data = """
[
 {
   "student": "Sachin",
   "courses": [
      {
        "courseName": "batting",
        "marks": 100
      },
      {
        "courseName": "batting",
        "marks": 50
      },
      {
        "courseName": "fielding",
        "marks": 60
      }
   ]
 },
 {
   "student": "Sourav",
   "courses": [
      {
        "courseName": "batting",
        "marks": 80
      },
      {
        "courseName": "bowling",
        "marks": 60
      },
      {
        "courseName": "fielding",
        "marks": 40
      }
   ]
 }
]
"""

ex2_output_data = """
[
  {
    "student": "Sachin"
    "duplicateCourses": [
      {
        "courseName": "batting"
        "duplicateCount": 2
      }
    ]
  }
]
"""

In [None]:
get_mongodb_query(ex2_input_data, ex2_output_data, GPT4_MODEL)

#### Example 3

A simple find example

In [None]:
ex3_input_data = """
[
 {
    "name": "Sachin",
    "age": 50,
    "team": "India"
 },
 {
   "name": "Lara",
   "age": 52,
   "team": "India"
 }
]
"""

ex3_output_data = """
[
  {
    "name": "Lara",
    "age": 52,
    "team": "India"
  }
]
"""

In [None]:
get_mongodb_query(ex3_input_data, ex3_output_data)

#### Example 4

Add a field

In [None]:
ex4_input_data = """
[
  {
    "_id": ObjectId("5bdb6a44d9b2d4645509db2e"),
    "crs": {
      "type": "name",
      "properties": {
        "name": "urn:ogc:def:crs:OGC:1.3:CRS84"
      }
    },
    "type": "FeatureCollection",
    "features": [
      {
        "geometry": {
          "type": "Point",
          "coordinates": [
            45,
            66
          ]
        },
        "type": "Feature",
        "id": 50,
        "properties": {
          "fogClass": 0,
          "_note": "movable",
          "fileLocation": "blah.jpg",
          "timeStamp": "2018-11-01 14:51:00",
          "predFALSE": 0.998167,
          "ipAddr": "http://abcd.ef",
          "longitude": "45",
          "predTRUE": 0.001833,
          "cameraID": "IDABC",
          "originalPath": "originalBlah.jpg",
          "location": "location1",
          "latitude": "66"
        }
      }
    ]
  }
]
"""

ex4_output_data = """
[
  {
    "_id": ObjectId("5bdb6a44d9b2d4645509db2e"),
    "crs": {
      "type": "name",
      "properties": {
        "name": "urn:ogc:def:crs:OGC:1.3:CRS84"
      }
    },
    "type": "FeatureCollection",
    "features": [
      {
        "geometry": {
          "type": "Point",
          "coordinates": [
            45,
            66
          ]
        },
        "type": "Feature",
        "id": 50,
        "properties": {
          "fogClass": 0,
          "_note": "movable",
          "fileLocation": "blah.jpg",
          "timeStamp": "2018-11-01 14:51:00",
          "predFALSE": 0.998167,
          "ipAddr": "http://abcd.ef",
          "longitude": "45",
          "predTRUE": 0.001833,
          "cameraID": "IDABC",
          "originalPath": "originalBlah.jpg",
          "location": "location1",
          "latitude": "66"
        },
        "timeMongo": ISODate("2018-11-01T14:51:00Z")
      }
    ]
  }
]
"""

In [None]:
get_mongodb_query(ex4_input_data, ex4_output_data)

#### Example 5

In [None]:
ex5_input_data = """
[
  {
    "bookCategory": "Non-Fiction",
    "books": [
      {
        "bookName": "Seven Habits",
        "pages": 200,
        "authors": [
          {
            "authorName": "Sachin",
            "authorEmail": "sachin@gmail.com"
          },
          {
            "authorName": "Sourav",
            "authorEmail": "sourav@gmail.com"
          }
        ]
      },
      {
        "bookName": "One thing",
        "pages": 100,
        "authors": [
          {
            "authorName": "Sachin",
            "authorEmail": "sachin@gmail.com"
          }
        ]
      },
      
    ]
  },
  {
    "bookCategory": "Fiction",
    "books": [
      {
        "bookName": "Harry Potter",
        "pages": 400,
        "authors": [
          {
            "authorName": "Sachin",
            "authorEmail": "sachin@gmail.com"
          },
          {
            "authorName": "Tim",
            "authorEmail": "Tim@gmail.com"
          }
        ]
      },
      {
        "bookName": "Alchemist",
        "pages": 100,
        "authors": [
          {
            "authorName": "Sourav",
            "authorEmail": "sourav@gmail.com"
          }
        ]
      },
      
    ]
  }
]
"""

ex5_output_data = """
[
  {
     "authorName": "Sachin",
     "bookName": [
        "Seven Habits",
        "One thing",
        "Harry Potter"
     ] 
  },
  {
     "authorName": "Sourav",
     "bookName": [
        "Seven Habits",
        "Alchemist"
     ] 
  },
  {
     "authorName": "Tim",
     "bookName": [
        "Harry Potter"
     ] 
  }
]
"""

In [None]:
get_mongodb_query(ex5_input_data, ex5_output_data)

#### Example 6

In [None]:
ex6_input_data = """
[
  {
    "bookCategory": "Non-Fiction",
    "books": [
      {
        "bookName": "Seven Habits",
        "pages": 200,
        "authors": [
          {
            "authorName": "Sachin",
            "authorEmail": "sachin@gmail.com"
          },
          {
            "authorName": "Sourav",
            "authorEmail": "sourav@gmail.com"
          }
        ]
      },
      {
        "bookName": "One thing",
        "pages": 100,
        "authors": [
          {
            "authorName": "Sachin",
            "authorEmail": "sachin@gmail.com"
          }
        ]
      },
      
    ]
  },
  {
    "bookCategory": "Fiction",
    "books": [
      {
        "bookName": "Harry Potter",
        "pages": 400,
        "authors": [
          {
            "authorName": "Sachin",
            "authorEmail": "sachin@gmail.com"
          },
          {
            "authorName": "Tim",
            "authorEmail": "Tim@gmail.com"
          }
        ]
      },
      {
        "bookName": "Alchemist",
        "pages": 100,
        "authors": [
          {
            "authorName": "Sourav",
            "authorEmail": "sourav@gmail.com"
          }
        ]
      },
      
    ]
  }
]
"""

ex6_output_data = """
[
  {
    "authors": [
      {
        "authorEmail": "sachin@gmail.com",
        "authorName": "Sachin"
      },
      {
        "authorEmail": "sourav@gmail.com",
        "authorName": "Sourav"
      }
    ],
    "bookName": "Seven Habits",
    "pages": 200
  },
  {
    "authors": [
      {
        "authorEmail": "sachin@gmail.com",
        "authorName": "Sachin"
      }
    ],
    "bookName": "One thing",
    "pages": 100
  },
  {
    "authors": [
      {
        "authorEmail": "sachin@gmail.com",
        "authorName": "Sachin"
      },
      {
        "authorEmail": "Tim@gmail.com",
        "authorName": "Tim"
      }
    ],
    "bookName": "Harry Potter",
    "pages": 400
  },
  {
    "authors": [
      {
        "authorEmail": "sourav@gmail.com",
        "authorName": "Sourav"
      }
    ],
    "bookName": "Alchemist",
    "pages": 100
  }
]
"""

In [None]:
get_mongodb_query(ex6_input_data, ex6_output_data)

#### Example 7

In [None]:
ex7_input_data = """
[
  {
    "bookName": "Seven Habits",
    "pages": 200,
    "authors": [
      {
        "authorName": "Sachin",
        "authorEmail": "sachin@gmail.com"
      },
      {
        "authorName": "Sourav",
        "authorEmail": "sourav@gmail.com"
      }
    ]
  },
  {
    "bookName": "One thing",
    "pages": 100,
    "authors": [
      {
        "authorName": "Sachin",
        "authorEmail": "sachin@gmail.com"
      }
    ]
  },
  {
    "bookName": "Harry Potter",
    "pages": 400,
    "authors": [
      {
        "authorName": "Sachin",
        "authorEmail": "sachin@gmail.com"
      },
      {
        "authorName": "Tim",
        "authorEmail": "Tim@gmail.com"
      }
    ]
  },
  {
    "bookName": "Alchemist",
    "pages": 100,
    "authors": [
      {
        "authorName": "Sourav",
        "authorEmail": "sourav@gmail.com"
      }
    ]
  }
]
"""

ex7_output_data = """
[
    {
        "bookName": "Seven Habits",
        "pages": 200,
        "authorName": "Sachin",
        "authorEmail": "sachin@gmail.com"
    },
    {
        "bookName": "Seven Habits",
        "pages": 200,
        "authorName": "Sourav",
        "authorEmail": "sourav@gmail.com"
    },
    {
        "bookName": "One thing",
        "pages": 100,
        "authorName": "Sachin",
        "authorEmail": "sachin@gmail.com"
    },
    {
        "bookName": "Harry Potter",
        "pages": 400,
        "authorName": "Sachin",
        "authorEmail": "sachin@gmail.com"
    },
    {
        "bookName": "Harry Potter",
        "pages": 400,
        "authorName": "Tim",
        "authorEmail": "Tim@gmail.com"
    },
    {
        "bookName": "Alchemist",
        "pages": 100,
        "authorName": "Sourav",
        "authorEmail": "sourav@gmail.com"
    }
]
"""

In [None]:
get_mongodb_query(ex7_input_data, ex7_output_data)

#### Example 8

In [None]:
ex8_input_data = """
[
    {
        "studentName": "Pete",
        "subjects": [
            {
                "subjectName": "Math",
                "result": "passed"
            },
            {
                "subjectName": "Physics",
                "result": "passed"
            },
            {
                "subjectName": "Chemistry",
                "result": "failed"
            },
            {
                "subjectName": "Botany",
                "result": "failed"
            },
            {
                "subjectName": "Zoology",
                "result": "failed"
            }
        ]
    }
]
"""

ex8_output_data = """
{
    "totalResultCount": 5,
    "totalPassedCount": 2,
    "totalFailedCount": 3
}
"""

In [None]:
get_mongodb_query(ex8_input_data, ex8_output_data)

#### Example 9

In [None]:
ex9_input_data = """
[
    {
        "studentName": "Pete",
        "subjects": [
            {
                "subjectName": "Math",
                "result": "passed"
            },
            {
                "subjectName": "Physics",
                "result": "passed"
            },
            {
                "subjectName": "Chemistry",
                "result": "failed"
            },
            {
                "subjectName": "Botany",
                "result": "failed"
            },
            {
                "subjectName": "Zoology",
                "result": "failed"
            }
        ]
    },
    {
        "studentName": "Mazu",
        "subjects": [
            {
                "subjectName": "Math",
                "result": "failed"
            },
            {
                "subjectName": "English",
                "result": "passed"
            },
            {
                "subjectName": "Commerce",
                "result": "passed"
            },
            {
                "subjectName": "Biology",
                "result": "passed"
            }
        ]
    } 
]
"""

ex9_output_data = """
{
    "totalStudentsCount": 2,
    "totalResultCount": 9,
    "totalPassedCount": 5,
    "totalFailedCount": 4,
    "totalUniqueSubjectsCount": 8,
    "totalSubjectsCount": 9
}
"""

In [None]:
get_mongodb_query(ex9_input_data, ex9_output_data)