In [None]:
### Important Parameters for AWS Setting

In [None]:
aws_profile_name=''
aws_service_name='bedrock-runtime'
aws_region_name=''
opensearch_host_name = "" #pick this value from AWS OpenSearchConsole
aws_embedding_model='amazon.titan-embed-text-v1'
index_name='sgrealestate-index'

### Import Python modules

In [None]:
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth, helpers
import boto3
import json
import pandas as pd 

### Load Singapore Real Estate Resale Flat Data (downloaded from Kaggle)

In [None]:
df=pd.read_csv('./data/Singapore-RealEstate-Selective.csv')


In [None]:
df

In [None]:
### Create a meaningfully derived Contextual Text for each sale line item

In [None]:

df['text'] = df.apply(lambda row: f'In "{row["month"]}", in the town of "{row["town"]}", at street of "{row["street_name"]}", in the apartment block of "{row["block"]}", a flat with "{row["flat_type"]}"s and area of "{row["floor_area_sqm"]}" square meters (located between storey "{row["storey_range"]}") was sold for "{row["resale_price"]}" SGD.', axis=1)
pd.set_option('display.max_colwidth', None) #setting Jupyter console display width better printing space, nothing todo with app. 
print(df[['text']].head())  # Show only the 'text' column of the first few rows for brevity
 
 

In [None]:
bedrock = boto3.Session(profile_name=aws_profile_name).client(
 service_name=aws_service_name,
 region_name= aws_region_name)


### Initialize and configure OpenSearch client

In [None]:
region = aws_region_name
open_search_service = "aoss"
credentials = boto3.Session(profile_name=aws_profile_name).get_credentials()
auth = AWSV4SignerAuth(credentials, region, open_search_service)

In [None]:
client = OpenSearch(
    hosts = [{"host": opensearch_host_name, "port": 443}],
    http_auth = auth,
    use_ssl = True,
    verify_certs = True,
    connection_class = RequestsHttpConnection,
    pool_maxsize = 20,
    timeout=60
)

### Create Embedding Using Amazon Titan Embed Text V1 Model 

In [None]:
def generate_embedding(text):
    print("******Passed text is", text)
    body=json.dumps({"inputText": text})
    data_format='application/json'
    response = bedrock.invoke_model(body=body, modelId=aws_embedding_model, accept=data_format, contentType=data_format)
    response_body = json.loads(response.get('body').read())
    embedding = response_body.get('embedding')
    print("******Embedding response is", embedding)

    return embedding

In [None]:
df=df.assign(embedding=(df["text"].apply(lambda x : generate_embedding(x))))

### Ingest the embedding into AWS OpenSearch Vector DB

In [None]:
def ingest_data_into_aws_opensearch(embedded_data,raw_data):
    document = {
      "sg_realestate_data_vector": embedded_data,
      "sg_realestate_raw_data": raw_data
    }
    
    response = client.index(
        index = index_name,
        body = document
    )
    print('\nIngesting Data into AWS OpenSearch. Response:',response)

In [None]:
df.apply(lambda row: ingest_data_into_aws_opensearch(row['embedding'], row['text']), axis=1)

### Semantic Search - KNN search over Index of AWS OpenSearch DB

In [None]:
def perform_knn_search_on_aws_vector_db(embedding_vector):
    document = {
        "size": 15,
        "_source": {"excludes": ["sg_realestate_data_vector"]},
        "query": {
            "knn": {
                 "sg_realestate_data_vector": {
                     "vector": embedding_vector,
                     "k":15
                 }
            }
        }
    }
    response = client.search(
    body = document,
    index = index_name
    )
    return response

In [None]:
query='any flat was sold for 400000 sgd'
vector=generate_embedding(query)
print("******Vector", vector)

In [None]:
response=perform_knn_search_on_aws_vector_db(vector)
print("****Actual Response", response)
data=response['hits']['hits']
print(data)

In [None]:
data