In [3]:
# !pip install google-genai

# JSON

In [4]:
import json

# Load the JSON file
json_file_path = "metadata/paglia_metadata.json"

with open(json_file_path, "r") as file:
    pagila_metadata = json.load(file)

# Display the structure of the JSON file (showing only a subset if it's large)
pagila_metadata


{'database_name': 'paglia',
 'description': 'A database for managing a DVD rental store.',
 'tables': [{'table_name': 'actor',
   'description': 'Stores information about actors.',
   'columns': [{'column_name': 'actor_id',
     'data_type': 'integer',
     'is_primary_key': True,
     'description': 'Unique identifier for the actor.'},
    {'column_name': 'first_name',
     'data_type': 'text',
     'description': "Actor's first name."},
    {'column_name': 'last_name',
     'data_type': 'text',
     'description': "Actor's last name."},
    {'column_name': 'last_update',
     'data_type': 'timestamp with time zone',
     'description': 'Timestamp of the last update to the record.'}]},
  {'table_name': 'address',
   'description': 'Stores address information.',
   'columns': [{'column_name': 'address_id',
     'data_type': 'integer',
     'is_primary_key': True,
     'description': 'Unique identifier for the address.'},
    {'column_name': 'address',
     'data_type': 'text',
     'de

In [9]:
names = list(map(str, list(pagila_metadata.keys())))
names

['database_name',
 'description',
 'tables',
 'views',
 'materialized_views',
 'functions',
 'relationships']

In [23]:
metadata = json.dumps(pagila_metadata)
len(metadata)

23036

# Gemini 

In [52]:
import os
from google import genai
from google.genai import types


In [55]:
# api_key=os.environ['GEMINI_API_KEY']
api_key = "AIzaSyCwXXVR0b5u5Jb5uV5rtIB3KvTOSKWaz4I"

In [56]:
client = genai.Client(api_key=api_key)


In [None]:
response = client.models.generate_content(
    model='gemini-2.0-flash-lite-preview-02-05', contents='Why is the sky blue?'
)
print(response.text)


The sky appears blue primarily due to a phenomenon called **Rayleigh scattering**. Here's a breakdown:

*   **Sunlight and the Atmosphere:** Sunlight is composed of all the colors of the rainbow. When sunlight enters the Earth's atmosphere, it collides with tiny air molecules (mostly nitrogen and oxygen).

*   **Scattering of Light:** This collision causes the light to scatter in different directions. The amount of scattering depends on the wavelength of the light.

*   **Rayleigh Scattering Favors Blue:** Rayleigh scattering is more effective at shorter wavelengths. Blue and violet light have the shortest wavelengths among the visible colors, so they are scattered much more than colors like red and orange.

*   **Why Mostly Blue, Not Violet?** Violet light is scattered even more than blue, but our eyes are less sensitive to violet. Also, the sun emits slightly less violet light than blue light. As a result, we perceive the sky as blue.

**In summary:**

1.  Sunlight enters the atmosph

In [58]:
prompt_template = """
You are a PostgreSQL expert.

Please help to generate a PostgreSQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions.

===Tables metadata in JSON like format
{metadata}


===Response Guidelines
1. If the provided context is sufficient, please generate a valid query without any explanations for the question. The query should start with a comment containing the question being asked.
2. If the provided context is insufficient, please explain why it can't be generated.
3. Please use the most relevant table(s).
5. Please format the query before responding.
6. Please always respond with a valid well-formed JSON object with the following format

===Response Format if Question made sense and could be converted to a valid query.
{{  
    "question": "List all actors' first and last names.",
    "query": "SELECT first_name, last_name FROM actor;",
    "explanation": "An explanation of query."
}}
===Response Format if Question was unclear/invalid and valid/correct query generation is difficult.
{{  
    "question" : "something something",
    "query": None,
    "explanation": "An explanation of failing to generate the query."
}}

===Question
{question}
"""

In [62]:
values = {
    "metadata": metadata,
    "question": "Get a list of all customers and the films they have rented."
}

# Fill the template
formatted_prompt = prompt_template.format(**values)

print(formatted_prompt)


You are a PostgreSQL expert.

Please help to generate a PostgreSQL query to answer the question. Your response should ONLY be based on the given context and follow the response guidelines and format instructions.

===Tables metadata in JSON like format
{"database_name": "paglia", "description": "A database for managing a DVD rental store.", "tables": [{"table_name": "actor", "description": "Stores information about actors.", "columns": [{"column_name": "actor_id", "data_type": "integer", "is_primary_key": true, "description": "Unique identifier for the actor."}, {"column_name": "first_name", "data_type": "text", "description": "Actor's first name."}, {"column_name": "last_name", "data_type": "text", "description": "Actor's last name."}, {"column_name": "last_update", "data_type": "timestamp with time zone", "description": "Timestamp of the last update to the record."}]}, {"table_name": "address", "description": "Stores address information.", "columns": [{"column_name": "address_id", "

In [63]:
len(formatted_prompt)

24327

In [64]:
response = client.models.generate_content(
    model='gemini-2.0-flash-001', contents=formatted_prompt
)
print(response.text)

```json
{
  "question": "Get a list of all customers and the films they have rented.",
  "query": "SELECT\n  c.first_name AS customer_first_name,\n  c.last_name AS customer_last_name,\n  f.title AS film_title\nFROM customer AS c\nJOIN rental AS r\n  ON c.customer_id = r.customer_id\nJOIN inventory AS i\n  ON r.inventory_id = i.inventory_id\nJOIN film AS f\n  ON i.film_id = f.film_id;",
  "explanation": "This query joins the customer, rental, inventory, and film tables to retrieve the first name, last name of customers and the title of the films they rented."
}
```


In [67]:
response

GenerateContentResponse(candidates=[Candidate(content=Content(parts=[Part(video_metadata=None, thought=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, inline_data=None, text='```json\n{\n  "question": "Get a list of all customers and the films they have rented.",\n  "query": "SELECT\\n  c.first_name AS customer_first_name,\\n  c.last_name AS customer_last_name,\\n  f.title AS film_title\\nFROM customer AS c\\nJOIN rental AS r\\n  ON c.customer_id = r.customer_id\\nJOIN inventory AS i\\n  ON r.inventory_id = i.inventory_id\\nJOIN film AS f\\n  ON i.film_id = f.film_id;",\n  "explanation": "This query joins the customer, rental, inventory, and film tables to retrieve the first name, last name of customers and the title of the films they rented."\n}\n```')], role='model'), citation_metadata=None, finish_message=None, token_count=None, avg_logprobs=-0.02113823625776503, finish_reason=<FinishReason.STOP: 'STOP'>, grounding_