In [26]:
# Standard library imports
from google.cloud import bigquery
import pandas as pd 
# from langchain_openai import ChatOpenAI
# from langchain.chat_models import ChatOpenAI  # old syntax
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser  # Changed this line

In [None]:
def get_bigquery_schema(project_id, dataset_id, table_id):
    client = bigquery.Client(project=project_id)
    
    # Get the full table reference
    table_ref = client.dataset(dataset_id).table(table_id)
    
    # Retrieve the table
    table = client.get_table(table_ref)
    
    schema_details = []
    for field in table.schema:
        schema_details.append({
            'name': field.name,
            'type': field.field_type,
            'mode': field.mode,
        })
    
    return schema_details 

def execute_bigquery_query(query: str) -> str:
    """Execute BigQuery query and return results as formatted string"""
    try:
        client = bigquery.Client()
        query_job = client.query(query)
        results = query_job.result()
        
        # Convert results to DataFrame and then to string
        df = results.to_dataframe()
        if len(df) > 10:  # Limit large results
            df = df.head(10)
        return df.to_string()
    except Exception as e:
        return f"Error executing query: {str(e)}"

def generate_bigquery_query(schema: str, question: str) -> str:
    """Generate BigQuery SQL query from natural language question"""
    model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview")
    
    prompt = PromptTemplate(
        template="""You are a data analyst who converts natural language questions into BigQuery SQL queries.
        Using the following schema, write a SQL query to answer the user's question.
        
        SCHEMA:
        {schema}
        
        QUESTION: {question}
        
        Write only the SQL query, nothing else. Ensure the query is compatible with BigQuery SQL syntax.
        """,
        input_variables=["schema", "question"]
    )
    
    chain = prompt | model | StrOutputParser()
    return chain.invoke({"schema": schema, "question": question})
 