In [None]:
# Use the environment variable if the user doesn't provide Project ID.
import os

import vertexai
from vertexai.preview.generative_models import (
    FunctionDeclaration,
    GenerativeModel,
    Tool,
    ToolConfig,
    Part,
    GenerationConfig,
)
from google import genai
from google.genai.types import GenerateContentConfig, Part
PROJECT_ID = "104916006626"  # @param {type: "string", placeholder: "[your-project-id]" isTemplate: true}
if not PROJECT_ID or PROJECT_ID == "xyz":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))

LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "australia-southeast1")

vertexai.init(project="104916006626", location=LOCATION)

client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)

In [None]:
import os
from google.oauth2 import service_account

# Path to your service account key file
key_path = "C:\\Users\\shres\\Projects\\RAG-case-study\keys\\keyproject-401005-6e1cdcbb5996.json"

# Create credentials using the service account key file
credentials = service_account.Credentials.from_service_account_file(
    key_path
)

# Set the credentials for the current environment
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
# auth_request = transport.requests.Request()
# credentials.refresh(auth_request)

In [None]:
from pydantic import BaseModel, Field

PDF_MIME_TYPE = "application/pdf"
JSON_MIME_TYPE = "application/json"
ENUM_MIME_TYPE = "text/x.enum"

In [None]:
from typing import List, Optional

class Section(BaseModel):
    title: str = Field(description="The title of the section")
    page_number: int = Field(description="The page number of the section")
    section_number: str = Field(description="The section number of the section")

class SubDivision(BaseModel):
    title: str = Field(description="The title of the sub-division")
    sections: List[Section] | None = Field(description="The sections of the sub-division")

class Division(BaseModel):
    title: str = Field(description="The title of the division")
    sub_divisions: List[SubDivision] | None = Field(description="The sub-divisions of the division")
    sections: List[Section] | None = Field(description="The sections of the division")

class Part(BaseModel):
    title: str = Field(description="The title of the part")
    divisions: List[Division] | None = Field(description="The divisions of the part")
    sections: List[Section] | None = Field(description="The sections of the part")

class TableOfContents(BaseModel):
    parts: List[Part] = Field(description="The parts of the table of contents")




In [None]:
MODEL_ID = "gemini-1.5-pro-002" 
# Load file bytes
with open(".\\processed_docs\\Privacy_Act_AU\\Privacy_Act_AU.pdf", "rb") as f:
    file_bytes = f.read()

# Send to Gemini API
response = client.models.generate_content(
    model=MODEL_ID,
    contents=[
        "The following document is an invoice.",
        Part.from_bytes(data=file_bytes, mime_type=PDF_MIME_TYPE),
    ],
    config=GenerateContentConfig(
        system_instruction=entity_extraction_system_instruction,
        temperature=0,
        response_schema=Invoice,
        response_mime_type=JSON_MIME_TYPE,
    ),
)

In [None]:

def generate_questions(context, num_questions=10):
    """
    Generate a set of questions and answers from a given context.

    Args:
    context: The context to generate questions from.
    num_questions: The number of questions to generate.

    Returns:
    A list of questions and answers.
    """
    model = GenerativeModel("gemini-1.5-pro-002")

    response = model.generate_content(
    system_prompt_QA_eval_bot.format(chunk_set=context, num_questions=num_questions),
    generation_config=GenerationConfig(
        response_mime_type="application/json", response_schema=response_schema
    ),
    )
    return response.text
