### Imports

In [1]:
import pandas as pd
import numpy as np
import pymongo
import json
import math
from datetime import date, datetime
import cx_Oracle

# path da instalação do instantclient
cx_Oracle.init_oracle_client(lib_dir=r"C:\instantclient_21_9")

### Estabelecer conexão

In [2]:
# Conectar ao Oracle
dsn_tns = cx_Oracle.makedsn('localhost', '1521', service_name='xe')
conn = cx_Oracle.connect(user='store', password='uminho2023', dsn=dsn_tns)

### Selecionar dados das tabelas

In [3]:
oracle_cursor = conn.cursor()

----
Grupo 1: USER

STORE_USERS

In [4]:
oracle_cursor.execute("SELECT * FROM STORE_USERS")
results_storeUsers = oracle_cursor.fetchall()

store_users = pd.DataFrame(results_storeUsers, columns=[i[0] for i in oracle_cursor.description])

ADDRESSES

In [5]:
oracle_cursor.execute("SELECT * FROM ADDRESSES")
results_addresses = oracle_cursor.fetchall()

addresses = pd.DataFrame(results_addresses, columns=[i[0] for i in oracle_cursor.description])

----
Grupo 2: EMPLOYEES

EMPLOYEES

In [6]:
oracle_cursor.execute("SELECT * FROM EMPLOYEES")
results_employees = oracle_cursor.fetchall()

employees = pd.DataFrame(results_employees, columns=[i[0] for i in oracle_cursor.description])

DEPARTMENTS

In [7]:
oracle_cursor.execute("SELECT * FROM DEPARTMENTS")
results_departments = oracle_cursor.fetchall()

departments = pd.DataFrame(results_departments, columns=[i[0] for i in oracle_cursor.description])

EMPLOYEES_ARCHIVE

In [8]:
oracle_cursor.execute("SELECT * FROM EMPLOYEES_ARCHIVE")
results_employeesArchive = oracle_cursor.fetchall()

employeesArchive = pd.DataFrame(results_employeesArchive, columns=[i[0] for i in oracle_cursor.description])

----
Grupo 3: PRODUCT  

PRODUCT

In [9]:
oracle_cursor.execute("SELECT * FROM PRODUCT")
results_product = oracle_cursor.fetchall()

product = pd.DataFrame(results_product, columns=[i[0] for i in oracle_cursor.description])

PRODUCT_CATEGORIES

In [10]:
oracle_cursor.execute("SELECT * FROM PRODUCT_CATEGORIES")
results_productCategories = oracle_cursor.fetchall()

productCategories = pd.DataFrame(results_productCategories, columns=[i[0] for i in oracle_cursor.description])

DISCOUNT

In [11]:
oracle_cursor.execute("SELECT * FROM DISCOUNT")
results_discount = oracle_cursor.fetchall()

discount = pd.DataFrame(results_discount, columns=[i[0] for i in oracle_cursor.description])

STOCK

In [12]:
oracle_cursor.execute("SELECT * FROM STOCK")
results_stock = oracle_cursor.fetchall()

stock = pd.DataFrame(results_stock, columns=[i[0] for i in oracle_cursor.description])

----
Grupo 4: ORDER  
  
  
PAYMENT_DETAILS

In [13]:
oracle_cursor.execute("SELECT * FROM PAYMENT_DETAILS")
results_paymentDetails = oracle_cursor.fetchall()

payment_details = pd.DataFrame(results_paymentDetails, columns=[i[0] for i in oracle_cursor.description])

ORDER_DETAILS

In [14]:
oracle_cursor.execute("SELECT * FROM ORDER_DETAILS")
results_orderDetails = oracle_cursor.fetchall()

order_details = pd.DataFrame(results_orderDetails, columns=[i[0] for i in oracle_cursor.description])

ORDER_ITEMS

In [15]:
oracle_cursor.execute("SELECT * FROM ORDER_ITEMS")
results_orderItems = oracle_cursor.fetchall()

order_items = pd.DataFrame(results_orderItems, columns=[i[0] for i in oracle_cursor.description])

CART_ITEM

In [16]:
oracle_cursor.execute("SELECT * FROM CART_ITEM")
results_cartItem = oracle_cursor.fetchall()

cart_item = pd.DataFrame(results_cartItem, columns=[i[0] for i in oracle_cursor.description])

SHOPPING_SESSION

In [17]:
oracle_cursor.execute("SELECT * FROM SHOPPING_SESSION")
results_shoppingSession = oracle_cursor.fetchall()

shopping_session = pd.DataFrame(results_shoppingSession, columns=[i[0] for i in oracle_cursor.description])

----
# Tratar os dados

----
### 2. EMPLOYEES
- Passar a ter um único documento para employees
- Adicionar um campo de "estado" (antigo/atual)
- Adicionar um campo de "department_info"


In [18]:
# Iterar pelo EMPLOYEES_ARCHIVE e descontruir em 2 registos (um com o estado "old", e o outro com o estado "active")

old_employees = []
active_employees = []

for index, row in employeesArchive.iterrows():
    
    if row['OLD_EMPLOYEE_ID'] is not None and not np.isnan(row['OLD_EMPLOYEE_ID']):
        employee_info_old = {
        "emp_ID": row['OLD_EMPLOYEE_ID'],
        "emp_firstName": row['OLD_FIRST_NAME'],
        "emp_MiddleName": row['OLD_MIDDLE_NAME'] if row['NEW_MIDDLE_NAME'] else '',
        "emp_LastName": row['OLD_LAST_NAME'],
        "emp_dateOfBirth": row['OLD_DATE_OF_BIRTH'].strftime('%Y-%m-%d %H:%M:%S'),
        "emp_departmentID": row['OLD_DEPARTMENT_ID'],
        "emp_hireDate": row['OLD_HIRE_DATE'].strftime('%Y-%m-%d %H:%M:%S'),
        "emp_salary": row['OLD_SALARY'],
        "emp_phoneNumber": row['OLD_PHONE_NUMBER'],
        "emp_Email": row['OLD_EMAIL'],
        "emp_ssnNumber": row['OLD_SSN_NUMBER'],
        "emp_managerID": row['OLD_MANAGER_ID'] if row['OLD_MANAGER_ID'] else 0,
        "employee_state": "old"
    }
        old_employees.append(employee_info_old)

    employee_info_active = {
            "emp_ID": row['NEW_EMPLOYEE_ID'],
            "emp_firstName": row['NEW_FIRST_NAME'],
            "emp_MiddleName": row['NEW_MIDDLE_NAME'] if row['NEW_MIDDLE_NAME'] else '',
            "emp_LastName": row['NEW_LAST_NAME'],
            "emp_dateOfBirth": row['NEW_DATE_OF_BIRTH'].strftime('%Y-%m-%d %H:%M:%S'),
            "emp_departmentID": row['NEW_DEPARTMENT_ID'],
            "emp_hireDate": row['NEW_HIRE_DATE'].strftime('%Y-%m-%d %H:%M:%S'),
            "emp_salary": row['NEW_SALARY'],
            "emp_phoneNumber": row['NEW_PHONE_NUMBER'],
            "emp_Email": row['NEW_EMAIL'],
            "emp_ssnNumber": row['NEW_SSN_NUMBER'],
            "emp_managerID": row['NEW_MANAGER_ID'] if row['OLD_MANAGER_ID'] else 0,
            "employee_state": "active"
    }
    active_employees.append(employee_info_active)

####

## Iniciar conexão com MongoDB

In [19]:
import pymongo

# Connect to MongoDB
client = pymongo.MongoClient("mongodb://localhost:27017/")

# Create a database called "oes"
database = client["oes"]

# Create collections
collection_user = database["user"]
collection_product = database["product"]
collection_order = database["order"]
collection_employees = database["employees"]

### EMPLOYEES - Enviar para o Mongo

In [20]:
# Convert the active_employees array into a list of employee documents
employee_documents = []
for employee in active_employees:
    employee_documents.append(employee)

# Insert the employee documents into the employees collection
collection_employees.insert_many(employee_documents)

<pymongo.results.InsertManyResult at 0x271a9449510>

- Adicionar um campo para o departamento

Nota: manager ID que estamos a ir buscar (proveniente do departamento) é diferente daquele que é fornecido no employee_archive

In [21]:
for entry in active_employees:
    id_employee = entry["emp_ID"]

    # Get the department ID for the current employee
    dept_id = entry['emp_departmentID']

    # Find the department information for the current department ID
    dept_info = departments.query("DEPARTMENT_ID == @dept_id").iloc[0]

    departmentId = dept_info["DEPARTMENT_ID"]
    departmentName = dept_info['DEPARTMENT_NAME']
    managerId = dept_info['MANAGER_ID']
    departmentDescription = dept_info['DEPARTMENT_DESC']
   
    # Create a new dictionary with the department information
    new_field = {
        "department_id": int(departmentId),
        "department_name": departmentName,
        "manager_id": int(managerId),
        "department_description": departmentDescription
    }

    # Update the "department_info" field in the current employee dictionary
    entry["department_info"] = new_field
    del entry["emp_departmentID"]
    if entry["_id"]: 
        del entry["_id"]
    
    collection_employees.update_one({"emp_ID": id_employee}, {'$set': {"department_info": new_field}})

    print(entry)


{'emp_ID': 1, 'emp_firstName': 'Kenon', 'emp_MiddleName': '', 'emp_LastName': 'Andries', 'emp_dateOfBirth': '1986-07-09 00:00:00', 'emp_hireDate': '2019-12-25 00:00:00', 'emp_salary': 9300, 'emp_phoneNumber': '279 266 4806', 'emp_Email': 'gandries0@google.de', 'emp_ssnNumber': '779-22-3853', 'emp_managerID': 0, 'employee_state': 'active', 'department_info': {'department_id': 1, 'department_name': 'Management', 'manager_id': 1, 'department_description': 'Supervision of other departments'}}
{'emp_ID': 2, 'emp_firstName': 'Brittney', 'emp_MiddleName': 'Leonidas', 'emp_LastName': 'Dimitriou', 'emp_dateOfBirth': '1976-07-21 00:00:00', 'emp_hireDate': '2018-01-08 00:00:00', 'emp_salary': 11800, 'emp_phoneNumber': '604 235 4231', 'emp_Email': 'ldimitriou1@icio.us', 'emp_ssnNumber': '825-48-6752', 'emp_managerID': 0, 'employee_state': 'active', 'department_info': {'department_id': 2, 'department_name': 'Development', 'manager_id': 2, 'department_description': "Developing of the store's website

----
### 3. PRODUCT
- Passar a ter um único documento para products
- Adicionar um campo de Category
- Adicionar um campo de Discount
- Adicionar um campo de Stock

In [22]:
product_documents = []

for index, row in product.iterrows():
    productObj = {
        "product_ID": row['PRODUCT_ID'],
        "product_name": row['PRODUCT_NAME'],
        "product_categoryID": row['CATEGORY_ID'],
        "product_sku": row['SKU'],
        "product_price": row['PRICE'],
        "product_discountId": int(row['DISCOUNT_ID']) if not math.isnan(row['DISCOUNT_ID']) else 0,
        "product_createdAt": row['CREATED_AT'].strftime('%Y-%m-%d %H:%M:%S') if not pd.isnull(row['CREATED_AT']) else '2022-07-01 09:57:48',
        "product_lastModified": row['LAST_MODIFIED'].strftime('%Y-%m-%d %H:%M:%S') if not pd.isnull(row['LAST_MODIFIED']) else '2022-07-01 09:57:48'
    }
    
    product_documents.append(productObj)
    
collection_product.insert_many(product_documents)    

<pymongo.results.InsertManyResult at 0x271a93e75e0>

- Adicionar campos category, discount, stock

In [23]:
for row in product_documents:
    
    productID = row['product_ID']
   
    category_row = productCategories.loc[productCategories["CATEGORY_ID"] == row['product_categoryID']].iloc[0]
    new_category = {
        "category_ID": int(category_row['CATEGORY_ID']),
        "category_name": category_row['CATEGORY_NAME'],
    }

    collection_product.update_one({"product_ID": productID}, {'$set': {"product_category": new_category}})

    stock_row = stock.loc[stock["PRODUCT_ID"] == productID].iloc[0]
    new_stock = {
        "quantity": int(stock_row['QUANTITY']),
        "max_stock_quantity": int(stock_row['MAX_STOCK_QUANTITY']),
        "unit": stock_row['UNIT']
    }
    
    collection_product.update_one({"product_ID": productID}, {'$set': {"product_stock": new_stock}})



    if row["_id"]: 
        del row["_id"]


    discount_row = {}
    if row['product_discountId'] != 0:
        discount_row = discount.loc[discount["DISCOUNT_ID"] == row['product_discountId']].iloc[0]
        new_discount = {
            "discount_id": int(discount_row['DISCOUNT_ID']),
            "discount_name": discount_row['DISCOUNT_NAME'],
            "discount_desc": discount_row['DISCOUNT_DESC'],
            "discount_percent": int(discount_row['DISCOUNT_PERCENT']),
            "discount_is_active_status": discount_row['IS_ACTIVE_STATUS'],
            "discount_createdAt": discount_row['CREATED_AT'] if not pd.isnull(discount_row['CREATED_AT']) else '2022-07-01 09:57:48',
            "discount_modifiedAt": discount_row['MODIFIED_AT'] if not pd.isnull(discount_row['MODIFIED_AT']) else '2022-07-01 09:57:48',
        }
        collection_product.update_one({"product_ID": productID}, {'$set': {"product_discount": new_discount}})