In [1]:
! pip install pymongo psycopg2-binary faker pandas

Collecting pymongo
  Downloading pymongo-4.12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (22 kB)
Collecting psycopg2-binary
  Downloading psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (4.9 kB)
Collecting faker
  Downloading faker-37.1.0-py3-none-any.whl.metadata (15 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Downloading dnspython-2.7.0-py3-none-any.whl.metadata (5.8 kB)
Downloading pymongo-4.12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (2.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading faker-37.1.0-py3-none-any.whl (1.9 MB)
[

### Simulation of Lumi Data

#### Structured Data Source:
- Simulates data from a bank API.
- Contains fields like customer_id, bank_name, credit_score, and outstanding_debt.
- Stored in PostgreSQL.

#### Unstructured Data Source:
- Simulates data from a third-party service provider.
- Contains raw, unstructured data (e.g., JSON with varying fields).
- Stored in MongoDB.

#### Correlation:
- Both datasets are linked using a customer_id.

In [7]:
import random
import time
from faker import Faker
import pymongo
import psycopg2
import pandas as pd
from datetime import datetime, timedelta

# Initialize Faker
fake = Faker()

# MongoDB connection
mongo_client = pymongo.MongoClient("mongodb://mongodb:27017/")
mongo_db = mongo_client["lumi_data"]
unstructured_data_collection = mongo_db["unstructured_data"]

# PostgreSQL connection
pg_conn = psycopg2.connect(
    dbname="lumi_credit",
    user="debezium",
    password="dbz",
    host="postgres"
)
pg_cursor = pg_conn.cursor()

# Function to generate fake customer data
def generate_customer_data():
    return {
        "name": fake.name(),
        "email": fake.email(),
        "phone": fake.phone_number(),
        "address": fake.address()
    }

# Function to generate structured data (from a bank API)
def generate_structured_data(customer_id):
    return {
        "customer_id": customer_id,
        "bank_name": fake.company(),
        "credit_score": random.randint(300, 850),
        "outstanding_debt": round(random.uniform(0, 100000), 2),
        "last_updated": fake.date_between(start_date='-2y', end_date='today')
    }

# Function to generate unstructured data (from a third-party service provider)
def generate_unstructured_data(customer_id):
    return {
        "customer_id": customer_id,
        "raw_data": {
            "transaction_history": [
                {
                    "date": datetime.combine(fake.date_this_year(), datetime.min.time()),  # Convert date to datetime
                    "amount": round(random.uniform(10, 1000), 2)
                } for _ in range(random.randint(1, 10))
            ],
            "social_media_activity": {
                "platform": random.choice(["Twitter", "Facebook", "LinkedIn"]),
                "activity_score": random.randint(1, 100)
            },
            "miscellaneous": {
                "notes": fake.sentence(),
                "risk_flags": random.choice(["Low", "Medium", "High"])
            }
        }
    }

# Simulate the process
def simulate_loan_request():
    # Step 1: Generate customer data
    customer_data = generate_customer_data()
    
    # Insert into PostgreSQL Customers table
    pg_cursor.execute("""
        INSERT INTO Customers (name, email, phone, address)
        VALUES (%s, %s, %s, %s) RETURNING customer_id
    """, (customer_data["name"], customer_data["email"], customer_data["phone"], customer_data["address"]))
    customer_id = pg_cursor.fetchone()[0]
    pg_conn.commit()

    # Step 2: Generate structured data (from a bank API)
    structured_data = generate_structured_data(customer_id)
    
    # Insert into PostgreSQL Credit_History table
    pg_cursor.execute("""
        INSERT INTO Credit_History (customer_id, bank_name, credit_score, outstanding_debt, last_updated)
        VALUES (%s, %s, %s, %s, %s)
    """, (structured_data["customer_id"], structured_data["bank_name"], structured_data["credit_score"], 
          structured_data["outstanding_debt"], structured_data["last_updated"]))
    pg_conn.commit()

    # Step 3: Generate unstructured data (from a third-party service provider)
    unstructured_data = generate_unstructured_data(customer_id)
    
    # Insert into MongoDB
    unstructured_data_collection.insert_one(unstructured_data)

    print(f"Simulated loan request for customer ID {customer_id}")

# Run the simulation
'''
for _ in range(10):  # Simulate 10 loan requests
    simulate_loan_request()
'''

while True:
    simulate_loan_request()
    time.sleep(2)

# Close connections
pg_cursor.close()
pg_conn.close()
mongo_client.close()

Simulated loan request for customer ID 292
Simulated loan request for customer ID 293
Simulated loan request for customer ID 294
Simulated loan request for customer ID 295
Simulated loan request for customer ID 296
Simulated loan request for customer ID 297
Simulated loan request for customer ID 298
Simulated loan request for customer ID 299
Simulated loan request for customer ID 300
Simulated loan request for customer ID 301
Simulated loan request for customer ID 302
Simulated loan request for customer ID 303
Simulated loan request for customer ID 304
Simulated loan request for customer ID 305
Simulated loan request for customer ID 306
Simulated loan request for customer ID 307
Simulated loan request for customer ID 308
Simulated loan request for customer ID 309
Simulated loan request for customer ID 310
Simulated loan request for customer ID 311
Simulated loan request for customer ID 312
Simulated loan request for customer ID 313
Simulated loan request for customer ID 314
Simulated l

KeyboardInterrupt: 