In [1]:
import pandas as pd

from task2.services.converters.converter import convert_to_employees
from task2.services.db.data_inserter import insert_data
from task2.services.generators.department_generator import generate_departments
from task2.services.transformations.base_transformations import trim_all_values
from task2.utls.logger_config import logger
from task2.models.base import Base, engine
from task2.services.schema.schema_definitions import EMPLOYEE_SCHEMA

# Create all tables in the database
Base.metadata.create_all(engine)
print("Database tables created successfully!")

2025-04-13 17:49:45,840 INFO sqlalchemy.engine.Engine BEGIN (implicit)
17:49:45 INFO: BEGIN (implicit)
2025-04-13 17:49:45,841 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("departments")
17:49:45 INFO: PRAGMA main.table_info("departments")
2025-04-13 17:49:45,841 INFO sqlalchemy.engine.Engine [raw sql] ()
17:49:45 INFO: [raw sql] ()
2025-04-13 17:49:45,842 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("departments")
17:49:45 INFO: PRAGMA temp.table_info("departments")
2025-04-13 17:49:45,843 INFO sqlalchemy.engine.Engine [raw sql] ()
17:49:45 INFO: [raw sql] ()
2025-04-13 17:49:45,843 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("employees")
17:49:45 INFO: PRAGMA main.table_info("employees")
2025-04-13 17:49:45,844 INFO sqlalchemy.engine.Engine [raw sql] ()
17:49:45 INFO: [raw sql] ()
2025-04-13 17:49:45,844 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("employees")
17:49:45 INFO: PRAGMA temp.table_info("employees")
2025-04-13 17:49:45,845 INFO sqlalchemy.

In [2]:
logger.info("Starting transformation: first trim, then convert types\n")
emp_df:pd.DataFrame =  pd.read_csv("./data/employees.csv", delimiter=',', header=0)

# Apply the transformation: trim values
transformed_emp_df = trim_all_values(emp_df, EMPLOYEE_SCHEMA)
print(transformed_emp_df.head(1))

17:49:45 INFO: Starting transformation: first trim, then convert types

trim_all_values executed in 0.0011 seconds
                                     id       name date_of_birth  salary  \
0  1dd3b13a-0b76-4468-b7c0-39ef57828315  James Lee    1973-09-15  145198   

  department_id  
0             8  


In [3]:
# Convert DataFrame to Employees ORM instances
employee_instances = convert_to_employees(transformed_emp_df)

# Use the inserter to insert employee data into the database
insert_data(engine, employee_instances)

2025-04-13 17:49:46,110 INFO sqlalchemy.engine.Engine BEGIN (implicit)
17:49:46 INFO: BEGIN (implicit)
2025-04-13 17:49:46,113 INFO sqlalchemy.engine.Engine INSERT INTO employees (name, date_of_birth, salary, department_id) VALUES (?, ?, ?, ?) RETURNING id
17:49:46 INFO: INSERT INTO employees (name, date_of_birth, salary, department_id) VALUES (?, ?, ?, ?) RETURNING id
2025-04-13 17:49:46,113 INFO sqlalchemy.engine.Engine [generated in 0.00023s (insertmanyvalues) 1/100 (ordered; batch not supported)] ('James Lee', '1973-09-15', '145198', '8')
17:49:46 INFO: [generated in 0.00023s (insertmanyvalues) 1/100 (ordered; batch not supported)] ('James Lee', '1973-09-15', '145198', '8')
2025-04-13 17:49:46,114 INFO sqlalchemy.engine.Engine INSERT INTO employees (name, date_of_birth, salary, department_id) VALUES (?, ?, ?, ?) RETURNING id
17:49:46 INFO: INSERT INTO employees (name, date_of_birth, salary, department_id) VALUES (?, ?, ?, ?) RETURNING id
2025-04-13 17:49:46,115 INFO sqlalchemy.engi

In [4]:
fake_departments = generate_departments(10)
insert_data(engine, fake_departments)

2025-04-13 17:49:46,740 INFO sqlalchemy.engine.Engine BEGIN (implicit)
17:49:46 INFO: BEGIN (implicit)
2025-04-13 17:49:46,741 INFO sqlalchemy.engine.Engine INSERT INTO departments (id, name) VALUES (?, ?)
17:49:46 INFO: INSERT INTO departments (id, name) VALUES (?, ?)
2025-04-13 17:49:46,741 INFO sqlalchemy.engine.Engine [generated in 0.00067s] [(1, 'Human Resources'), (2, 'Engineering'), (3, 'Marketing'), (4, 'Sales'), (5, 'Finance'), (6, 'Operations'), (7, 'Legal'), (8, 'Customer Support'), (9, 'Product Management'), (10, 'IT')]
17:49:46 INFO: [generated in 0.00067s] [(1, 'Human Resources'), (2, 'Engineering'), (3, 'Marketing'), (4, 'Sales'), (5, 'Finance'), (6, 'Operations'), (7, 'Legal'), (8, 'Customer Support'), (9, 'Product Management'), (10, 'IT')]
2025-04-13 17:49:46,743 INFO sqlalchemy.engine.Engine COMMIT
17:49:46 INFO: COMMIT
10 instances inserted successfully!
