In [112]:
import pandas as pd
import random
from datetime import datetime, timedelta
from pymongo import MongoClient
from azure.core.exceptions import AzureError
from azure.core.credentials import AzureKeyCredential
import json
import urllib 
from openai import AzureOpenAI
from dotenv import dotenv_values
import os
config = dotenv_values()

In [113]:
from dotenv import load_dotenv

load_dotenv("variable.env", override=True)

True

In [117]:
from urllib.parse import quote_plus

MONGO_CONNECTION_STRING= os.getenv("MONGO_CONNECTION_STRING", "<YOUR-COSMOS-DB-CONNECTION-STRING>")
AOAI_KEY = quote_plus(os.getenv("AOAI_KEY"))
AOAI_ENDPOINT =  quote_plus(os.getenv("AOAI_ENDPOINT"))
API_VERSION =  quote_plus(os.getenv("API_VERSION"))
AOAI_EMBEDDING_DEPLOYMENT =  quote_plus(os.getenv("AOAI_EMBEDDING_DEPLOYMENT"))
AOAI_EMBEDDING_DEPLOYMENT_MODEL =  quote_plus(os.getenv("AOAI_EMBEDDING_DEPLOYMENT_MODEL"))


client = AzureOpenAI(
  azure_endpoint= AOAI_ENDPOINT,
  api_key=AOAI_KEY,  
  api_version=API_VERSION
)

In [13]:
def generate_embedding(text):
    response = client.embeddings.create(
        model=AOAI_EMBEDDING_DEPLOYMENT_MODEL,
        input=text
    )
    return response.data[0].embedding

In [118]:
mongo_conn = MONGO_CONNECTION_STRING
mongo_client = MongoClient(mongo_conn)

db = mongo_client['account']

# Create collection if it doesn't exist
COLLECTION_NAME = "transactions"

collection = db[COLLECTION_NAME]

if COLLECTION_NAME not in db.list_collection_names():
    db.create_collection(COLLECTION_NAME)
    print("Created collection '{}'.\n".format(COLLECTION_NAME))
else:
    print("Using collection: '{}'.\n".format(COLLECTION_NAME))

  mongo_client = MongoClient(mongo_conn)


Using collection: 'transactions'.



In [81]:
db.command({
  'createIndexes': 'transactions',
  'indexes': [
    {
      'name': 'transactionsIndex',
      'key': {
        "Embedding": "cosmosSearch"
      },
      'cosmosSearchOptions': {
        'kind': 'vector-hnsw',
        'numLists': 1,
        'similarity': 'COS',
        'dimensions': 1536
      }
    }
  ]
})

{'raw': {'defaultShard': {'numIndexesBefore': 1,
   'numIndexesAfter': 2,
   'createdCollectionAutomatically': False,
   'ok': 1}},
 'ok': 1}

In [78]:
db.command({'dropIndexes': 'transactions', 'index': 'transactionsIndex'})

{'nIndexesWas': 2, 'ok': 1.0}

In [62]:
collection.delete_many({})

DeleteResult({'n': 100, 'ok': 1.0}, acknowledged=True)

In [63]:
collection.insert_many(embeddings)

print("Data uploaded successfully!")

Data uploaded successfully!


In [108]:
def vector_search(new_transaction, num_results=5):
    # Generate the embedding for the new transaction
    query_embedding = generate_embedding(new_transaction)

    # Define the search pipeline with vector search using the `Embedding` field
    pipeline = [
        {
            '$search': {
                "cosmosSearch": {
                    "vector": query_embedding,  # The query vector (embedding of the new transaction)
                    "path": "Embedding",  # The field that contains embeddings in the collection
                    "k": num_results  # Number of results to return
                },
                "returnStoredSource": True
            }
        },
        {
            '$project': {
                'similarityScore': { '$meta': 'searchScore' },  # Project the similarity score
                'TransactionID': 1,  # Project relevant fields
                'Amount': 1,
                'Timestamp': 1,
                'Location': 1,
                'Merchant': 1,
                'Fraud': 1
            }
        }
    ]

    # Execute the aggregation pipeline in Cosmos DB
    results = collection.aggregate(pipeline)
    
    
    return results



In [109]:
# Example new transaction (replace with actual transaction data)
new_transaction = {
    "TransactionID": "T5978",
    "Amount": 15.00,
    "Timestamp": "2024-09-15 14:30:00",
    "Location": "Mexico",
    "Merchant": "Walmart"
}

new_transaction_text=f"TransactionID: {new_transaction['TransactionID']}, Timestamp: {new_transaction['Timestamp']}, Location: {new_transaction['Location']}"

# Perform the vector search and get the top 5 most similar fraudulent transactions
results = vector_search(new_transaction_text, num_results=5)
# Output results
for r in results:
    print(r) 
print(f"TransactionID: {new_transaction['TransactionID']}, Location: {new_transaction['Location']}, Merchant: {new_transaction['Merchant']}")


{'_id': ObjectId('66e75576b5224a4bb5787440'), 'TransactionID': 'T6345', 'Amount': 60.86, 'Timestamp': '2024-09-15 13:58:38', 'Location': 'Texas', 'Merchant': 'Unrecognized Merchant', 'Fraud': True, 'similarityScore': 0.859049030996715}
{'_id': ObjectId('66e75576b5224a4bb5787484'), 'TransactionID': 'T6029', 'Amount': 22.22, 'Timestamp': '2024-09-15 14:20:38', 'Location': 'Texas', 'Merchant': 'Unrecognized Merchant', 'Fraud': True, 'similarityScore': 0.8586209416389526}
{'_id': ObjectId('66e75576b5224a4bb578746b'), 'TransactionID': 'T1805', 'Amount': 27.51, 'Timestamp': '2024-09-15 14:27:38', 'Location': 'Texas', 'Merchant': 'Random Store', 'Fraud': True, 'similarityScore': 0.8523443253245563}
{'_id': ObjectId('66e75576b5224a4bb578748f'), 'TransactionID': 'T1263', 'Amount': 10.66, 'Timestamp': '2024-09-15 14:21:38', 'Location': 'Texas', 'Merchant': 'Unrecognized Merchant', 'Fraud': True, 'similarityScore': 0.8517198054957642}
{'_id': ObjectId('66e75576b5224a4bb5787485'), 'TransactionID':

In [None]:
# TODO currently using location as a primary for vector search