In [8]:
## libraries 
from sqlalchemy import create_engine, text
import matplotlib.pyplot as plt
from pymongo import MongoClient
from time import time 
import seaborn as sns
import pandas as pd 
import dotenv
import json 
import os 

plt.style.use('dark_background')
pd.set_option('display.max_columns', None)

In [3]:
## database initialization
dotenv.load_dotenv()


MYSQL_CONFIG = {
    'username': os.environ['mysql_username'],
    'password': os.environ['mysql_password'],
    'host'    : os.environ['mysql_hostname'],
}

MONGO_DB_CONFIG = {
    'username': os.environ['mongo_username'],
    'password': os.environ['mongo_password'],
    'hostname' : os.environ['mongo_hostname'],
    'appName' : os.environ['mongo_appName'],
    'port'    : os.environ['mongo_port']
}

DB_NAME = 'project_test'

In [4]:
mysql_url = f"mysql+mysqlconnector://{MYSQL_CONFIG['username']}:{MYSQL_CONFIG['password']}@{MYSQL_CONFIG['host']}:3306/{DB_NAME}"
engine = create_engine(mysql_url)
mongo_uri = f'mongodb://{MONGO_DB_CONFIG["hostname"]}:{MONGO_DB_CONFIG["port"]}/?directConnection=true&serverSelectionTimeoutMS=2000&appName={MONGO_DB_CONFIG["appName"]}+2.3.3'
client = MongoClient(mongo_uri)

#### MYSQL

In [None]:
#Select users with a credit score greater than 750.
engine.execute(f"SELECT id, current_age, yearly_income, credit_score 
               FROM Users 
               WHERE credit_score > 750;")

In [None]:
#Select all transactions where errors occurred.
engine.execute(f"SELECT id, date, card_id, amount, errors
               FROM Transactions
               WHERE errors IS NOT NULL AND errors != '';")

In [None]:
#Calculate the total transaction amount per state for female users.
engine.execute(f"SELECT T.merchant_state, SUM(T.amount) AS total_spent
               FROM Transactions T
               JOIN Users U ON T.client_id = U.id
               WHERE U.gender = 'female'
               GROUP BY T.merchant_state;")

In [None]:
#Find all cards flagged as "on_dark_web" that were used in transactions exceeding $1,000.
engine.execute(f"SELECT C.card_number, C.card_brand, T.amount, T.date
               FROM Transactions T
               JOIN Cards C ON T.card_id = C.id
               WHERE C.card_on_dark_web = 1 AND T.amount > 1000;")

#### MONGO DB

In [4]:
def drop_all_collection(db):
    collections = db.list_collection_names()
    for collection in collections:
        db.drop_collection(collection)
        print(f"Collection {collection} dropped.")

In [6]:
db = client[DB_NAME]


collections = db.list_collection_names()
cards_collection = db["cards"]
users_collection = db["users"]
transactions_collection = db["transactions"]

Simple queries

In [8]:
#Query Objective: Select users with a credit score greater than 750.
print('Select users with a credit score greater than 750.')
result = users_collection.find({"credit_score": {"$gt": 750}})
print('Total number of docs collected : ', users_collection.count_documents({"credit_score": {"$gt": 750}}))

print()

print('Select all transactions where errors occurred.')
#Query Objective: Select all transactions where errors occurred.
results = transactions_collection.find({"errors": {"$ne" : "No Errors"}})
print('Total number of docs collected : ', transactions_collection.count_documents({"errors": {"$ne" : "No Errors"}}))
for result in results:
    print(f"Client ID: {result['client_id']} | Used Chip: {result['use_chip']} | Error Type: {result['errors']}")

Select users with a credit score greater than 750.
Total number of docs collected :  254

Select all transactions where errors occurred.
Total number of docs collected :  9
Client ID: 464 | Used Chip: Swipe Transaction | Error Type: Technical Glitch
Client ID: 1797 | Used Chip: Online Transaction | Error Type: Bad Expiration
Client ID: 1797 | Used Chip: Online Transaction | Error Type: Bad Card Number
Client ID: 1424 | Used Chip: Swipe Transaction | Error Type: Insufficient Balance
Client ID: 843 | Used Chip: Swipe Transaction | Error Type: Insufficient Balance
Client ID: 1424 | Used Chip: Swipe Transaction | Error Type: Insufficient Balance
Client ID: 319 | Used Chip: Swipe Transaction | Error Type: Insufficient Balance
Client ID: 1190 | Used Chip: Online Transaction | Error Type: Insufficient Balance
Client ID: 1782 | Used Chip: Swipe Transaction | Error Type: Insufficient Balance


Complex Queries

In [9]:
#Query Objective: Calculate the total transaction amount per state for female users.
female_transactions_state_pipeline= [
    {
        "$match": {
            "gender": "Female"
        }
    },
    {
        "$lookup": {
            "from": "transactions",  
            "localField": "id",  
            "foreignField": "client_id",  
            "as": "user_transactions"  
        }
    },
    {
        "$unwind": "$user_transactions"
    },
    {
        "$group": {
            "_id": "$user_transactions.merchant_state",  
            "total_amount": {
                "$sum": {
                    "$toDouble": "$user_transactions.amount"  
                }
            }
        }
    },
    {
        "$sort": {
            "total_amount": -1
        }
    }
]

result = users_collection.aggregate(female_transactions_state_pipeline)
data = []
for state in result:
    data.append([state['_id'], round(state['total_amount'], 2)])

pd.DataFrame(data = data, columns= ['State_ID', 'Total Amount']).T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37
State_ID,Unknown,PA,NY,CA,OK,MI,NV,FL,AL,AR,TX,WI,IL,CO,NC,NJ,GA,MA,AZ,SC,IA,LA,VA,IN,KS,MD,NE,HI,CT,MN,NM,KY,WA,MO,TN,UT,OR,OH
Total Amount,2656.2,1317.47,1083.97,632.91,419.7,401.05,398.07,369.22,297.37,281.74,259.41,258.4,244.98,235.0,234.12,188.15,179.35,158.75,153.66,151.22,125.17,107.25,96.25,87.58,82.53,81.45,75.45,51.65,39.14,34.46,34.07,32.76,31.53,25.5,22.58,9.23,4.76,-29.92


In [32]:
#Query Objective: Find all cards flagged as "on_dark_web" that were used in transactions exceeding $1,000.
dark_web_pipeline = [
    {
        "$match": {
            "card_on_dark_web": "No" # for now using No because there are no Yes 
        }
    },
    {
        "$lookup": {
            "from": "transactions",  
            "localField": "id",  
            "foreignField": "card_id",  
            "as": "transactions"  
        }
    },
    {
        "$unwind": "$transactions"
    },
    {
        "$addFields": {
            "transactions.amount": {
                "$toDouble": "$transactions.amount"  
            }
        }
    },
    {
        "$match": {
            "transactions.amount": {"$gt": 100}
        }
    }
]

result = cards_collection.aggregate(dark_web_pipeline)

for ind, card in enumerate(result):
    print(f" {ind} ".center(80, '='))
    print(f"Card ID: {card['id']}, Card Brand: {card['card_brand']}, Card Type: {card['card_type']}")
    print(f"Transaction Amount: ${card['transactions']['amount']}, Date: {card['transactions']['date']}")
    print(f"Merchant State: {card['transactions']['merchant_state']}, On Dark Web: {card['card_on_dark_web']}")

Card ID: 5482, Card Brand: Mastercard, Card Type: Debit
Transaction Amount: $107.12, Date: 2010-01-01 09:04:00
Merchant State: MI, On Dark Web: No
Card ID: 5375, Card Brand: Mastercard, Card Type: Credit
Transaction Amount: $103.92, Date: 2010-01-01 07:17:00
Merchant State: SC, On Dark Web: No
Card ID: 2808, Card Brand: Mastercard, Card Type: Debit
Transaction Amount: $189.29, Date: 2010-01-01 06:10:00
Merchant State: Unknown, On Dark Web: No
Card ID: 5146, Card Brand: Mastercard, Card Type: Debit
Transaction Amount: $120.0, Date: 2010-01-01 08:43:00
Merchant State: CA, On Dark Web: No
Card ID: 5146, Card Brand: Mastercard, Card Type: Debit
Transaction Amount: $120.0, Date: 2010-01-01 09:06:00
Merchant State: CA, On Dark Web: No
Card ID: 243, Card Brand: Visa, Card Type: Debit
Transaction Amount: $262.47, Date: 2010-01-01 05:55:00
Merchant State: NY, On Dark Web: No
Card ID: 243, Card Brand: Visa, Card Type: Debit
Transaction Amount: $192.29, Date: 2010-01-01 06:46:00
Merchant State: U