In [1]:
import pandas as pd

from task2.services.converters.converter import convert_to_employees
from task2.services.db.data_inserter import insert_data
from task2.services.generators.department_generator import generate_department_records
from task2.services.transformations.base_transformations import trim_all_values
from task2.utls.logger_config import logger
from task2.models.base import Base, engine
from task2.services.schema.schema_definitions import EMPLOYEE_SCHEMA

# Create all tables in the database
Base.metadata.create_all(engine)
print("Database tables created successfully!")

2025-04-13 03:10:21,856 INFO sqlalchemy.engine.Engine BEGIN (implicit)
03:10:21 INFO: BEGIN (implicit)
2025-04-13 03:10:21,856 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("departments")
03:10:21 INFO: PRAGMA main.table_info("departments")
2025-04-13 03:10:21,857 INFO sqlalchemy.engine.Engine [raw sql] ()
03:10:21 INFO: [raw sql] ()
2025-04-13 03:10:21,858 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("departments")
03:10:21 INFO: PRAGMA temp.table_info("departments")
2025-04-13 03:10:21,858 INFO sqlalchemy.engine.Engine [raw sql] ()
03:10:21 INFO: [raw sql] ()
2025-04-13 03:10:21,859 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("employees")
03:10:21 INFO: PRAGMA main.table_info("employees")
2025-04-13 03:10:21,859 INFO sqlalchemy.engine.Engine [raw sql] ()
03:10:21 INFO: [raw sql] ()
2025-04-13 03:10:21,859 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("employees")
03:10:21 INFO: PRAGMA temp.table_info("employees")
2025-04-13 03:10:21,860 INFO sqlalchemy.

In [2]:
logger.info("Starting transformation: first trim, then convert types\n")
emp_df:pd.DataFrame =  pd.read_csv("./data/employees.csv", delimiter=',', header=0)

# Apply the transformation: trim values
transformed_emp_df = trim_all_values(emp_df, EMPLOYEE_SCHEMA)
print(transformed_emp_df.head(1))

03:10:21 INFO: Starting transformation: first trim, then convert types

trim_all_values executed in 0.0009 seconds
                                     id       name date_of_birth  salary  \
0  1dd3b13a-0b76-4468-b7c0-39ef57828315  James Lee    1973-09-15  145198   

  department_id  
0             8  


In [3]:
# Convert DataFrame to Employees ORM instances
employee_instances = convert_to_employees(transformed_emp_df)

# Use the inserter to insert employee data into the database
insert_data(engine, employee_instances)

2025-04-13 03:10:22,015 INFO sqlalchemy.engine.Engine BEGIN (implicit)
03:10:22 INFO: BEGIN (implicit)
2025-04-13 03:10:22,018 INFO sqlalchemy.engine.Engine INSERT INTO employees (name, date_of_birth, salary, department_id, created_at, updated_at) VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) RETURNING id, created_at, updated_at
03:10:22 INFO: INSERT INTO employees (name, date_of_birth, salary, department_id, created_at, updated_at) VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) RETURNING id, created_at, updated_at
2025-04-13 03:10:22,018 INFO sqlalchemy.engine.Engine [generated in 0.00022s (insertmanyvalues) 1/100 (ordered; batch not supported)] ('James Lee', '1973-09-15', '145198', '8')
03:10:22 INFO: [generated in 0.00022s (insertmanyvalues) 1/100 (ordered; batch not supported)] ('James Lee', '1973-09-15', '145198', '8')
2025-04-13 03:10:22,019 INFO sqlalchemy.engine.Engine INSERT INTO employees (name, date_of_birth, salary, department_id, created_at, updated_

In [4]:
fake_departments = generate_department_records(10)
insert_data(engine, fake_departments)

2025-04-13 03:10:22,887 INFO sqlalchemy.engine.Engine BEGIN (implicit)
03:10:22 INFO: BEGIN (implicit)
2025-04-13 03:10:22,888 INFO sqlalchemy.engine.Engine INSERT INTO departments (name, id, created_at, updated_at) VALUES (?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) RETURNING id, created_at, updated_at
03:10:22 INFO: INSERT INTO departments (name, id, created_at, updated_at) VALUES (?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) RETURNING id, created_at, updated_at
2025-04-13 03:10:22,888 INFO sqlalchemy.engine.Engine [generated in 0.00007s (insertmanyvalues) 1/10 (ordered; batch not supported)] ('Montgomery-Walker', 1)
03:10:22 INFO: [generated in 0.00007s (insertmanyvalues) 1/10 (ordered; batch not supported)] ('Montgomery-Walker', 1)
2025-04-13 03:10:22,889 INFO sqlalchemy.engine.Engine INSERT INTO departments (name, id, created_at, updated_at) VALUES (?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) RETURNING id, created_at, updated_at
03:10:22 INFO: INSERT INTO departments (name, id, cre