In [3]:
import json
import boto3
import time  

bedrock_runtime = boto3.client(  
    service_name='bedrock-runtime',
    region_name=boto3.Session().region_name,
) 

def bedrock_embed(prompt_data):
    
    #     Define model parameters  
    body = json.dumps({  
        "inputText": prompt_data,  
        "dimensions": 512,  
        "normalize": True  
    })  
    
    model_id = 'amazon.titan-embed-text-v2:0'  # Look for embeddings in the modelID
    accept = 'application/json'  
    content_type = 'application/json'  
    
    # Record start time  
    start_time = time.time()  
    
    # Invoke model  
    response = bedrock_runtime.invoke_model(  
        body=body,
        modelId=model_id,
        accept=accept,
        contentType=content_type
    )
    
    # Record end time  
    end_time = time.time()  
    
    # Calculate duration  
    duration = end_time - start_time  
    print(f"Bedrock model invocation duration: {duration} seconds")  
    
    # Process response  
    response_body = json.loads(response['body'].read())  
    embedding = response_body.get('embedding')  
    
    return embedding

In [4]:
bedrock_embed("HELLO")

Bedrock model invocation duration: 0.19741058349609375 seconds


[-0.107890256,
 0.03613971,
 0.05535546,
 0.0026443691,
 0.06945876,
 -0.0606442,
 0.002633351,
 0.03314276,
 0.018246146,
 -0.01189966,
 0.0029528788,
 0.0054650293,
 0.0027435329,
 -0.03719746,
 0.0065668495,
 -0.025562234,
 0.00039390082,
 -0.038078915,
 0.081446566,
 0.0049361554,
 -0.03314276,
 0.008461981,
 0.053945128,
 -0.020537933,
 -0.0606442,
 -0.045483146,
 -0.044954274,
 -0.036844876,
 0.019832768,
 -0.08497239,
 0.08603014,
 0.024504486,
 0.024857068,
 -0.0033274977,
 0.00015081167,
 0.07016393,
 0.025209652,
 0.0028867696,
 0.00036360073,
 -0.011811515,
 0.060291614,
 -0.00797718,
 -0.00594983,
 0.046893477,
 0.00594983,
 0.08497239,
 0.031027263,
 0.04583573,
 0.04301507,
 -0.02018535,
 -0.00019832767,
 0.035081964,
 -0.08179915,
 0.07086909,
 0.014367739,
 0.0322613,
 0.011987806,
 0.043720234,
 -0.025738526,
 0.020537933,
 0.007492379,
 0.0044954275,
 0.08497239,
 -0.01736469,
 -0.0019502222,
 -0.052534796,
 -0.08461981,
 -0.021771971,
 0.00022725046,
 0.026267398,
 -

In [3]:
api_response = {
    "query_embedding": None,
    "embedding_source": None,
    "destinations": [
        {
            "name": "calcutta",
            "lat": None,
            "lon": None
        }
    ],
    "brands": [
        {
            "name": "",
            "sentiment": ""
        }
    ],
    "price_filter": {
        "threshold": 500.0,
        "currency": "USD",
        "operator": "lte",
        "rate": None
    },
    "themes": [
        {
            "name": "",
            "sentiment": ""
        }
    ],
    "experiences": [
        {
            "name": "",
            "sentiment": ""
        }
    ],
    "attributes": [
        {
            "name": "POOL",
            "sentiment": None
        }
    ]
}

In [5]:
price_log = {
            "search_id": "abc",
            "price": api_response["price_filter"].get("threshold"),
            "operator": api_response["price_filter"].get("operator"),
            "currency": api_response["price_filter"].get("currency"),
            "rate": api_response["price_filter"].get("rate"),
        }

In [6]:
price_log

{'search_id': 'abc',
 'price': 500.0,
 'operator': 'lte',
 'currency': 'USD',
 'rate': None}

In [7]:
# Extracting values from the API response
location_name = api_response['destinations'][0]['name'] if len(api_response['destinations']) > 0 else None
lat = api_response['destinations'][0]['lat'] if len(api_response['destinations']) > 0 else None
lon = api_response['destinations'][0]['lon'] if len(api_response['destinations']) > 0 else None
experiences = api_response['experiences']

# Assuming static values or placeholders for missing keys
search_id = "YourSearchIDHere"  # Placeholder, replace with actual search ID
llm_source = "Bedrock"

# Constructing the dictionary
result_dict = {
    "search_id": search_id,
    "llm_source": llm_source,
    "location": location_name,
    "lat": lat,
    "lon": lon,
    "experiences": experiences
}

In [8]:
result_dict

{'search_id': 'YourSearchIDHere',
 'llm_source': 'Bedrock',
 'location': 'calcutta',
 'lat': None,
 'lon': None,
 'experiences': [{'name': '', 'sentiment': ''}]}

In [3]:
import boto3
import pandas as pd
import io
import json
import pyarrow.parquet as pq

# Initialize S3 client
s3 = boto3.client('s3')

# Parse the S3 URI
s3_uri = 's3://h-staging-genai-events/errors/format-conversion-failed/intent-search-context-log/eventdatehour=2024-09-05-11/h-staging-ds-pers-intent-search-context-log-1-2024-09-05-11-02-09-7491b6a0-0e5b-41f3-8b83-b0315e472e75'
bucket_name, key = s3_uri.replace("s3://", "").split("/", 1)

# Download the file from S3
response = s3.get_object(Bucket=bucket_name, Key=key)
file_content = response['Body'].read()

# Try to determine the file format
try:
    # Attempt to read as CSV
    df = pd.read_csv(io.BytesIO(file_content))
    print("File read as CSV.")
except Exception as e_csv:
    try:
        # Attempt to read as JSON
        json_content = json.loads(file_content.decode('utf-8'))
        if isinstance(json_content, list):
            df = pd.DataFrame(json_content)
        else:
            df = pd.json_normalize(json_content)
        print("File read as JSON.")
    except Exception as e_json:
        try:
            # Attempt to read as Parquet
            df = pd.read_parquet(io.BytesIO(file_content))
            print("File read as Parquet.")
        except Exception as e_parquet:
            print("Failed to read the file in CSV, JSON, or Parquet format.")
            raise

# Display the DataFrame
print(df.head())


File read as CSV.
Empty DataFrame
Columns: [{"attemptsMade":1, arrivalTimestamp:1725534129000, errorCode:"DataFormatConversion.AccessDenied", errorMessage:"Access was denied when calling Glue. Please ensure that the role specified in the data format conversion configuration has the necessary permissions. User: arn:aws:sts::354156725301:assumed-role/hyatt-ds-algorithmic-genai-intentsearc-FirehoseRole-9774ZbY6YZZz/AWSFirehoseToS3 is not authorized to perform: glue:GetTableVersions on resource: arn:aws:glue:us-east-1:354156725301:catalog because no identity-based policy allows the glue:GetTableVersions action (Service: Glue,  Status Code: 400,  Request ID: 2372914d-1208-4c6c-b225-fe679b103ef6)", attemptEndingTimestamp:1725534432583, rawData:"eyJzZWFyY2hfaWQiOiAiZGZhMmY2MmUtOTE1YS00OWM1LWJjZGQtOWNiOTcwNDJiMWFjIiwgInZpc2l0b3JfaWQiOiAidmlzaXRvcklEIiwgIm1lbWJlcl9pZCI6IG51bGwsICJjaGFubmVsIjogIndlYiIsICJsb2NhbF90cyI6ICIxNzI1NTM0MTIwMjE4IiwgImNvdW50cnlfbmFtZSI6ICJDTiIsICJyZWdpb25fbmFtZSI6ICIiLCA

In [4]:
df

Unnamed: 0,"{""attemptsMade"":1",arrivalTimestamp:1725534129000,"errorCode:""DataFormatConversion.AccessDenied""","errorMessage:""Access was denied when calling Glue. Please ensure that the role specified in the data format conversion configuration has the necessary permissions. User: arn:aws:sts::354156725301:assumed-role/hyatt-ds-algorithmic-genai-intentsearc-FirehoseRole-9774ZbY6YZZz/AWSFirehoseToS3 is not authorized to perform: glue:GetTableVersions on resource: arn:aws:glue:us-east-1:354156725301:catalog because no identity-based policy allows the glue:GetTableVersions action (Service: Glue",Status Code: 400,"Request ID: 2372914d-1208-4c6c-b225-fe679b103ef6)""",attemptEndingTimestamp:1725534432583,"rawData:""eyJzZWFyY2hfaWQiOiAiZGZhMmY2MmUtOTE1YS00OWM1LWJjZGQtOWNiOTcwNDJiMWFjIiwgInZpc2l0b3JfaWQiOiAidmlzaXRvcklEIiwgIm1lbWJlcl9pZCI6IG51bGwsICJjaGFubmVsIjogIndlYiIsICJsb2NhbF90cyI6ICIxNzI1NTM0MTIwMjE4IiwgImNvdW50cnlfbmFtZSI6ICJDTiIsICJyZWdpb25fbmFtZSI6ICIiLCAiY2l0eV9uYW1lIjogIiIsICJ1c2VyX2FnZW50IjogIk1vemlsbGEvNS4wIChNYWNpbnRvc2g7IEludGVsIE1hYyBPUyBYIDEwXzE1XzcpIEFwcGxlV2ViS2l0LzUzNy4zNiAoS0hUTUwsIGxpa2UgR2Vja28pIENocm9tZS8xMjcuMC4wLjAgU2FmYXJpLzUzNy4zNiBFZGcvMTI3LjAuMC4wIiwgInF1ZXJ5IjogIkxhIFphbWJyYSBSZXNvcnQiLCAiYXJyaXZhbF9kYXRlIjogIjIwMjQtMDktMTIiLCAiZGVwYXJ0dXJlX2RhdGUiOiAiMjAyNC0wOS0xNCIsICJudW1fcm9vbXMiOiAxLCAiYWNjZXNzaWJsZSI6IGZhbHNlLCAibnVtX2FkdWx0cyI6IDEsICJudW1fY2hpbGRyZW4iOiAwLCAic3BlY2lhbF9yYXRlX2NvZGUiOiAiUEtHT0xGIiwgInNwZWNpYWxfcmF0ZV9jYXRlZ29yeSI6ICJPRkZFUl9DT0RFIiwgInVzZV9wb2ludHMiOiB0cnVlLCAic2VhcmNoX3RzIjogIjIwMjQtMDktMDVUMTE6MDI6MDQuNTUwNzc1In0=""","dataCatalogTable:{""catalogId"":""354156725301""","databaseName:""h-staging-ds-genai-events""","tableName:""intent_search_context_log""","region:""us-east-1""","versionId:""LATEST""","roleArn:""arn:aws:iam::354156725301:role/hyatt-ds-algorithmic-genai-intentsearc-FirehoseRole-9774ZbY6YZZz""}}"


In [None]:
search_classification_log_id
search_classification_log_id

In [1]:
from pydantic import BaseModel, Field, ValidationError, confloat, validator
from datetime import date, datetime
from typing import List, Literal, Optional

class SearchContext(BaseModel):
    query: str
    arrival_date: Optional[date]

In [2]:
event = {
    "query": "hyatt hotels for less than 30,000 points, modern theme, with city tour experience",
}


In [3]:
SearchContext(**event)

ValidationError: 1 validation error for SearchContext
arrival_date
  Field required [type=missing, input_value={'query': 'hyatt hotels f...h city tour experience'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.8/v/missing

In [None]:
secrets_manager = boto3.client(
    "secretsmanager", region_name=boto3.Session().region_name
)

gcp_env = "STAGE" if environment == "staging" else "PROD"
gcp_secret_name = os.environ["gcp_secret_name"].replace("ENV", gcp_env)

gcp_scopes = [os.environ["gcp_scope"]]

def get_secret_dict(secret_name):
    response = secrets_manager.get_secret_value(SecretId=secret_name)
    secret_string = response["SecretString"]
    secret_dict = json.loads(secret_string)
    return secret_dict

# Retrieve Google Application Credentials from AWS Secrets Manager and refresh token when needed
creds = None

def load_google_creds():
    global creds
    if creds and creds.token and not creds.expired:
        return creds

    try:
        secret_dict = get_secret_dict(gcp_secret_name)

        # Load the credentials from the secret dictionary
        creds = service_account.Credentials.from_service_account_info(
            secret_dict, scopes=gcp_scopes
        )
        creds.refresh(GoogleRequest())
        return creds

    except Exception:
        logger.error("Error reading Google Application Credentials")
        traceback.print_exc()
        raise ValueError("INTENT_SEARCH_API_ERROR_500") from None

def embed(
    text_to_embed,
    model_url="https://us-central1-aiplatform.googleapis.com/v1/projects/hdp-analytics-dev/locations/us-central-1/publishers/google/models/text-embedding-004:predict",
    embedding_dim: int = 768,
    task="SEMANTIC_SIMILARITY",
):
    """

    Embeds texts with a pre-trained, foundational model.

    """

    try:
        creds = load_google_creds()
        payload = {
            "instances": [
                {"task_type": task, "content": text_to_embed},
            ],
            "parameters": {
                "output_dimensionality": embedding_dim,
                "auto_truncate": True,
            },
        }
        headers = {
            "Authorization": "Bearer %s" % creds.token,
            "Content-Type": "application/json",
        }
        logger.info(
            "Embedding text using model at %s with payload: %s", model_url, payload
        )
        response = requests.post(model_url, headers=headers, json=payload)
        response_json = json.loads(response.text)
        embeddings = response_json["predictions"][0]["embeddings"]["values"]
        logger.info("Received embedding response: %s", response_json)
        return embeddings

    except requests.exceptions.HTTPError as http_err:
        logger.error("HTTP error occurred during embedding: %s", http_err)
        traceback.print_exc()
        raise ValueError("INTENT_SEARCH_API_ERROR_500") from http_err
    except requests.exceptions.ConnectionError as conn_err:
        logger.error("Connection error occurred during embedding: %s", conn_err)
        traceback.print_exc()
        raise ValueError("INTENT_SEARCH_API_ERROR_500") from conn_err
    except requests.exceptions.Timeout as timeout_err:
        logger.error("Timeout error occurred during embedding: %s", timeout_err)
        traceback.print_exc()
        raise ValueError("INTENT_SEARCH_API_ERROR_500") from timeout_err
    except requests.exceptions.RequestException as req_err:
        logger.error("Request exception occurred during embedding: %s", req_err)
        traceback.print_exc()
        raise ValueError("INTENT_SEARCH_API_ERROR_500") from req_err
    except Exception as err:
        logger.error("An error occurred during embedding: %s", err)
        traceback.print_exc()
        raise ValueError("INTENT_SEARCH_API_ERROR_500") from err