In [1]:
#!pip install gremlinpython
!pip install tabulate
import random
import pandas as pd
from datetime import datetime, timedelta
from gremlin_python.structure.graph import Graph
from gremlin_python.process.graph_traversal import __
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
import nest_asyncio
import boto3
import json
from tabulate import tabulate

# Apply this for SageMaker environments that need nested async execution
nest_asyncio.apply()





In [2]:
# Step 1: Generating Synthetic Data
def generate_synthetic_accounts(num_accounts=10000):
    accounts = []
    for i in range(num_accounts):
        account_id = f"acc_{i}"
        phone_number = f"+1-555-{random.randint(1000000, 9999999)}"
        address = f"{random.randint(1000, 9999)} Elm Street"
        creation_date = datetime.now() - timedelta(days=random.randint(0, 1000))
        email = f"user{i}@example.com"
        status = random.choice(["active", "inactive", "suspended"])
        credit_card_last_4 = f"{random.randint(1000, 9999)}" if random.random() > 0.8 else None
        device_id = f"dev_{random.randint(1000, 9999)}"
        ip_address = f"192.168.{random.randint(0, 255)}.{random.randint(0, 255)}"
        country = random.choice(["US", "CA", "UK"])
        dob = datetime.now() - timedelta(days=random.randint(6570, 21900))  # Age between 18 and 60
        balance = random.uniform(0, 10000)
        transaction_count = random.randint(0, 100)
        fraud_flag = random.random() > 0.95  # 5% chance of fraud
        loyalty_points = random.randint(0, 1000)
        subscription_type = random.choice(["basic", "premium", "vip"])
        last_login = datetime.now() - timedelta(days=random.randint(0, 30))

        # Randomly generate a masked SSN (last 4 digits only)
        ssn_last_4 = f"***-**-{random.randint(1000, 9999)}" if random.random() > 0.5 else None

        accounts.append({
            'account_id': account_id,
            'phone_number': phone_number,
            'address': address,
            'creation_date': creation_date,
            'email': email,
            'status': status,
            'credit_card_last_4': credit_card_last_4,
            'device_id': device_id,
            'ip_address': ip_address,
            'country': country,
            'dob': dob,
            'balance': balance,
            'transaction_count': transaction_count,
            'fraud_flag': fraud_flag,
            'loyalty_points': loyalty_points,
            'subscription_type': subscription_type,
            'last_login': last_login,
            'ssn_last_4': ssn_last_4
        })

    return pd.DataFrame(accounts)

# Generate synthetic accounts
df_accounts = generate_synthetic_accounts(15000)



In [3]:
# Step 2: Connect to Neptune and analyze graph data
graph = Graph()
remoteConn = DriverRemoteConnection('wss://neptunedbcluster-ovklzbmd7ywv.cluster-ctij9o7ncbvq.us-east-1.neptune.amazonaws.com:8182/gremlin', 'g')
g = graph.traversal().withRemote(remoteConn)

In [4]:
def analyze_fraud_with_claude(fraud_description):
    bedrock = boto3.client('bedrock-runtime')
   
    body = json.dumps({
        "anthropic_version": "bedrock-2023-05-31",
        "max_tokens": 300,
        "messages": [
            {
                "role": "user",
                "content": f"""Analyze the following account activity for potential fraud. Provide a brief assessment in JSON format with these fields:
1. is_fraudulent: true/false
2. confidence: 0-100
3. main_indicator: Brief description of the primary fraud indicator
4. explanation: Short explanation of your assessment

Account activity: {fraud_description}"""
            }
        ],
        "temperature": 0.1
    })

    response = bedrock.invoke_model(
        modelId='anthropic.claude-3-5-sonnet-20240620-v1:0',
        body=body,
        contentType='application/json'
    )

    response_body = json.loads(response['body'].read())
    
    # The model's response is in plain text, not JSON
    model_response = response_body['content'][0]['text']
    
    # Find the start and end of the JSON object in the response
    start = model_response.find('{')
    end = model_response.rfind('}') + 1
    
    if start != -1 and end != -1:
        json_str = model_response[start:end]
        try:
            return json.loads(json_str)
        except json.JSONDecodeError:
            print("Failed to parse JSON. Raw response:")
            print(model_response)
            return None
    else:
        print("No JSON object found in the response. Raw response:")
        print(model_response)
        return None

# Example description to analyze
fraud_description = """
New account opened yesterday:
1. Address in New York, but IP address from Russia
2. Received $9,000 in multiple deposits within 24 hours
3. Attempted to wire transfer funds to a tax haven country
"""

# Analyzing the fraud description with Claude 3.5 Sonnet
fraud_analysis = analyze_fraud_with_claude(fraud_description)

if fraud_analysis:
    print("Analysis from Claude 3.5 Sonnet:")
    print(json.dumps(fraud_analysis, indent=2))
else:
    print("Failed to get a valid analysis from the model.")


Analysis from Claude 3.5 Sonnet:
{
  "is_fraudulent": true,
  "confidence": 95,
  "main_indicator": "Rapid influx and attempted transfer of funds from a newly opened account",
  "explanation": "Multiple red flags suggest fraudulent activity: geographic mismatch between address and IP, large deposits immediately after account opening, and attempt to quickly transfer funds to a tax haven. This pattern is consistent with money laundering or unauthorized account access."
}


In [19]:
import json
import boto3

def analyze_fraud_with_claude(fraud_description):
    bedrock = boto3.client('bedrock-runtime')
   
    messages = [
        {
            "role": "user",
            "content": f"""You are an AI assistant specialized in detecting new account fraud for a financial institution. Analyze the following account activity description and determine if it's likely to be fraudulent. Pay special attention to signs of new account fraud, such as:
1. Multiple accounts with shared contact information
2. Rapid account creation and usage
3. Inconsistent or impossible geographic activity
4. Unusual transaction patterns for new accounts
5. Mismatched personal information
6. Use of virtual private networks (VPNs) or proxy servers
7. Suspicious email domains or disposable email addresses

Account activity description: {fraud_description}

Based on the information provided, please analyze the likelihood of new account fraud and provide your assessment in the following JSON format:
{{
    "is_fraudulent": true/false,
    "confidence": 0-100,
    "fraud_indicators": [
        {{
            "indicator": "Name of the fraud indicator",
            "severity": "Low/Medium/High",
            "explanation": "Brief explanation of why this is indicative of fraud"
        }},
        ...
    ],
    "overall_explanation": "Your detailed explanation of the analysis, including any patterns or red flags identified",
    "recommended_actions": [
        {{
            "action": "Specific action to take",
            "priority": "Low/Medium/High",
            "rationale": "Reason for this action"
        }},
        ...
    ],
    "additional_information_needed": [
        "List any additional information that would be helpful for a more accurate assessment"
    ]
}}"""
        }
    ]

    body = json.dumps({
        "anthropic_version": "bedrock-2023-05-31",
        "max_tokens": 2000,
        "messages": messages,
        "temperature": 0.1,
        "top_p": 1
    })

    response = bedrock.invoke_model(
        modelId='anthropic.claude-3-5-sonnet-20240620-v1:0',
        body=body,
        contentType='application/json'
    )

    response_body = json.loads(response['body'].read())
    return json.loads(response_body['content'][0]['text'])

# Example description to analyze
fraud_description = """
A new account was opened yesterday with the following characteristics:
1. The account holder's address is in New York, but the IP address used to create the account is from a VPN server in Russia.
2. Within 24 hours of opening, the account received five separate deposits totaling just under $10,000 from different sources.
3. Immediately after the deposits, there were attempts to wire transfer most of the funds to an account in a known tax haven country.
4. The phone number provided has been associated with three other new accounts opened in the past week, all with different names but similar activity patterns.
5. The email address used is from a temporary email service provider.
"""

# Analyzing the fraud description with Claude 3.5 Sonnet
fraud_analysis = analyze_fraud_with_claude(fraud_description)
print("Analysis from Claude 3.5 Sonnet:")
print(json.dumps(fraud_analysis, indent=2))


Analysis from Claude 3.5 Sonnet:
{
  "is_fraudulent": true,
  "confidence": 95,
  "fraud_indicators": [
    {
      "indicator": "VPN usage from Russia",
      "severity": "High",
      "explanation": "The use of a VPN server in Russia while the account holder's address is in New York is highly suspicious and indicative of attempts to hide the true location."
    },
    {
      "indicator": "Rapid large deposits",
      "severity": "High",
      "explanation": "Multiple deposits totaling just under $10,000 within 24 hours of account opening is unusual for a new account and may be an attempt to avoid reporting thresholds."
    },
    {
      "indicator": "Immediate wire transfer attempt",
      "severity": "High",
      "explanation": "Attempting to wire transfer funds to a tax haven country immediately after deposits is a classic sign of money laundering or fraud."
    },
    {
      "indicator": "Shared phone number",
      "severity": "High",
      "explanation": "The phone number be

In [9]:
import boto3
import json
import time
from tabulate import tabulate
from botocore.exceptions import ClientError

def analyze_fraud_with_claude(fraud_description, model_id, max_retries=3, initial_backoff=1):
    bedrock = boto3.client('bedrock-runtime')
    
    body = json.dumps({
        "anthropic_version": "bedrock-2023-05-31",
        "max_tokens": 300,
        "messages": [
            {
                "role": "user",
                "content": f"""Analyze the following account activity for potential fraud. Provide a brief assessment in JSON format with these fields: 1. is_fraudulent: true/false 2. confidence: 0-100 3. main_indicator: Brief description of the primary fraud indicator 4. explanation: Short explanation of your assessment Account activity: {fraud_description}"""
            }
        ],
        "temperature": 0.1
    })

    # Implement exponential backoff retry logic
    for attempt in range(max_retries):
        try:
            response = bedrock.invoke_model(
                modelId=model_id,
                body=body,
                contentType='application/json'
            )
            response_body = json.loads(response['body'].read())
            model_response = response_body['content'][0]['text']
            
            # Try to extract JSON from the response
            try:
                start = model_response.index('{')
                end = model_response.rindex('}') + 1
                json_str = model_response[start:end]
                return json.loads(json_str)
            except (ValueError, json.JSONDecodeError):
                print(f"Failed to parse JSON for model {model_id}. Raw response:")
                print(model_response)
                return None
                
        except ClientError as e:
            error_code = e.response['Error']['Code']
            if error_code == 'ThrottlingException':
                if attempt == max_retries - 1:  # Last attempt
                    print(f"Max retries reached for model {model_id}. Error: {str(e)}")
                    return None
                
                # Calculate backoff time with exponential increase
                backoff_time = initial_backoff * (2 ** attempt)
                print(f"Request throttled. Retrying in {backoff_time} seconds...")
                time.sleep(backoff_time)
                continue
            else:
                print(f"AWS error with model {model_id}: {str(e)}")
                return None
            
        except Exception as e:
            print(f"Unexpected error with model {model_id}: {str(e)}")
            return None

# Example description to analyze
fraud_description = """
New account opened yesterday:
1. Address in New York, but IP address from Russia
2. Received $9,000 in multiple deposits within 24 hours
3. Attempted to wire transfer funds to a tax haven country
"""

# Models to compare
models = [
    'anthropic.claude-3-5-sonnet-20240620-v1:0',
    'anthropic.claude-3-haiku-20240307-v1:0'
]

# Analyzing the fraud description with both models
results = {}
for model in models:
    result = analyze_fraud_with_claude(fraud_description, model)
    if result:
        results[model] = result
    else:
        print(f"Skipping comparison for model {model} due to parsing error.")

# If we have results for both models, compare them
if len(results) == 2:
    # Prepare data for side-by-side comparison
    comparison_data = []
    for key in results[models[0]].keys():
        comparison_data.append([
            key,
            str(results[models[0]][key]),
            str(results[models[1]][key])
        ])
    
    # Print side-by-side comparison
    print("\nSide-by-side Comparison:")
    print(tabulate(comparison_data, headers=["Field", "Claude 3.5 Sonnet", "Claude 3 Haiku"], tablefmt="grid"))
    
    # Additional comparison
    print("\nComparison Summary:")
    if results[models[0]]['is_fraudulent'] == results[models[1]]['is_fraudulent']:
        print("Both models agree on whether the activity is fraudulent.")
    else:
        print("Models disagree on whether the activity is fraudulent.")
    
    confidence_diff = abs(results[models[0]]['confidence'] - results[models[1]]['confidence'])
    print(f"Confidence difference: {confidence_diff}")
else:
    print("Unable to compare results due to parsing errors.")


Side-by-side Comparison:
+----------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Field          | 