diff --git a/03_labs/Arhum_Qaiser/Sales_python_project/__pycache__/config.cpython-313.pyc b/03_labs/Arhum_Qaiser/Sales_python_project/__pycache__/config.cpython-313.pyc new file mode 100644 index 0000000..d87b98a Binary files /dev/null and b/03_labs/Arhum_Qaiser/Sales_python_project/__pycache__/config.cpython-313.pyc differ diff --git a/03_labs/Arhum_Qaiser/Sales_python_project/__pycache__/extractor.cpython-313.pyc b/03_labs/Arhum_Qaiser/Sales_python_project/__pycache__/extractor.cpython-313.pyc new file mode 100644 index 0000000..69be53a Binary files /dev/null and b/03_labs/Arhum_Qaiser/Sales_python_project/__pycache__/extractor.cpython-313.pyc differ diff --git a/03_labs/Arhum_Qaiser/Sales_python_project/__pycache__/loader.cpython-313.pyc b/03_labs/Arhum_Qaiser/Sales_python_project/__pycache__/loader.cpython-313.pyc new file mode 100644 index 0000000..56d8bfb Binary files /dev/null and b/03_labs/Arhum_Qaiser/Sales_python_project/__pycache__/loader.cpython-313.pyc differ diff --git a/03_labs/Arhum_Qaiser/Sales_python_project/__pycache__/logger_config.cpython-313.pyc b/03_labs/Arhum_Qaiser/Sales_python_project/__pycache__/logger_config.cpython-313.pyc new file mode 100644 index 0000000..febbacb Binary files /dev/null and b/03_labs/Arhum_Qaiser/Sales_python_project/__pycache__/logger_config.cpython-313.pyc differ diff --git a/03_labs/Arhum_Qaiser/Sales_python_project/__pycache__/transformer.cpython-313.pyc b/03_labs/Arhum_Qaiser/Sales_python_project/__pycache__/transformer.cpython-313.pyc new file mode 100644 index 0000000..8cb94dc Binary files /dev/null and b/03_labs/Arhum_Qaiser/Sales_python_project/__pycache__/transformer.cpython-313.pyc differ diff --git a/03_labs/Arhum_Qaiser/Sales_python_project/config.py b/03_labs/Arhum_Qaiser/Sales_python_project/config.py new file mode 100644 index 0000000..fbe4c0b --- /dev/null +++ b/03_labs/Arhum_Qaiser/Sales_python_project/config.py @@ -0,0 +1,7 @@ +config_dba = { + "server": r"localhost\SQLEXPRESS", + "database": "SalesDB", + "driver": "ODBC Driver 17 for SQL Server" +} + +excel_file_path = r"C:\Users\Arhum Qaisar\Documents\Fabric-Project-Repo\Python-for-Data-Engineering\03_labs\Arhum_Qaiser\Sales_python_project\sales_transactions.xlsx" \ No newline at end of file diff --git a/03_labs/Arhum_Qaiser/Sales_python_project/extractor.py b/03_labs/Arhum_Qaiser/Sales_python_project/extractor.py new file mode 100644 index 0000000..e144c16 --- /dev/null +++ b/03_labs/Arhum_Qaiser/Sales_python_project/extractor.py @@ -0,0 +1,27 @@ +from openpyxl import load_workbook +from logger_config import setup_logger +from config import excel_file_path + +class extractor : + def __init__(self) : + self.logger = setup_logger() + + def read_excel(self): + + data = [] + try: + workbook = load_workbook(excel_file_path) + sheet = workbook.active + headers = [cell.value for cell in sheet[1]] + + for row in sheet.iter_rows(min_row=2, values_only=True): + row_data = dict(zip(headers, row)) + data.append(row_data) + + self.logger.info(f"Successfully read {len(data)} rows from Excel.") + except FileNotFoundError: + self.logger.error("Excel file not found. Check the file path.") + except Exception as e: + self.logger.error(f"Error reading Excel file: {e}") + + return data \ No newline at end of file diff --git a/03_labs/Arhum_Qaiser/Sales_python_project/loader.py b/03_labs/Arhum_Qaiser/Sales_python_project/loader.py new file mode 100644 index 0000000..e0a3776 --- /dev/null +++ b/03_labs/Arhum_Qaiser/Sales_python_project/loader.py @@ -0,0 +1,69 @@ +import pyodbc +from logger_config import setup_logger +from config import config_dba + +class loader: + def __init__(self): + self.logger = setup_logger() + + def connect(self): + try: + conn_str = ( + f"DRIVER={{{config_dba['driver']}}};" + f"SERVER={config_dba['server']};" + f"DATABASE={config_dba['database']};" + "Trusted_Connection=yes;" + ) + conn = pyodbc.connect(conn_str) + self.logger.info("Successfully connected to the database using Windows Authentication.") + return conn + except Exception as e: + self.logger.error(f"Database connection failed: {e}") + return None + + def delete_table(self, conn): + drop_table_query = "DROP TABLE IF EXISTS Sales" + try: + cursor = conn.cursor() + cursor.execute(drop_table_query) + conn.commit() + self.logger.info("Sales table dropped successfully.") + except Exception as e: + self.logger.error(f"Failed to drop table: {e}") + + def create_and_insert(self, conn, records): + create_table_query = """ + IF NOT EXISTS (SELECT * FROM sysobjects WHERE name='Sales') + CREATE TABLE Sales ( + ReportID varchar(50), + SalesPersonID varchar(50), + ProductID varchar(50), + Quantity varchar(50), + TotalSalesValue FLOAT + ) + """ + try: + cursor = conn.cursor() + cursor.execute(create_table_query) + conn.commit() + self.logger.info("SalesData table is ready.") + except Exception as e: + self.logger.error(f"Failed to create table: {e}") + + inserted = 0 + for r in records: + try: + cursor.execute( + """ + INSERT INTO Sales (ReportID, SalesPersonID, ProductID, Quantity, TotalSalesValue) + VALUES (?, ?, ?, ?, ?) + """, + r["ReportID"], r["SalesPersonID"], r["ProductID"], r["Quantity"], r["TotalSalesValue"] + ) + inserted += 1 + except Exception as e: + self.logger.error(f"Failed to insert record {r}: {e}") + + conn.commit() + cursor.close() + self.logger.info(f"Inserted {inserted} records into SalesData table.") diff --git a/03_labs/Arhum_Qaiser/Sales_python_project/logger_config.py b/03_labs/Arhum_Qaiser/Sales_python_project/logger_config.py new file mode 100644 index 0000000..d277cbf --- /dev/null +++ b/03_labs/Arhum_Qaiser/Sales_python_project/logger_config.py @@ -0,0 +1,19 @@ +import logging + +def setup_logger() : + logger = logging.getLogger('sales_ETL') + logger.setLevel(logging.INFO) + + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + + file_handler = logging.FileHandler('sales_etl.log') + file_handler.setFormatter(formatter) + + console_handler = logging.StreamHandler() + console_handler.setFormatter(formatter) + + if not logger.hasHandlers(): + logger.addHandler(file_handler) + logger.addHandler(console_handler) + + return logger \ No newline at end of file diff --git a/03_labs/Arhum_Qaiser/Sales_python_project/main.py b/03_labs/Arhum_Qaiser/Sales_python_project/main.py new file mode 100644 index 0000000..82e2a49 --- /dev/null +++ b/03_labs/Arhum_Qaiser/Sales_python_project/main.py @@ -0,0 +1,34 @@ +from extractor import extractor +from transformer import transformer +from loader import loader +from logger_config import setup_logger + +def main(): + print("Starting ETL process...") + + extractor_obj = extractor() + transformer_obj = transformer() + loader_obj = loader() + + # Step 1: Extract + data = extractor_obj.read_excel() + + # Step 2: Transform + transformed_data = transformer_obj.clean_data(data) + + # Step 3: Load + conn = loader_obj.connect() + if conn: + loader_obj.delete_table(conn) + loader_obj.logger.info("Prompting user for confirmation before data insertion.") + proceed = input("Do you want to continue with data insertion? (yes/no): ") + if proceed.lower() != 'yes': + print("ETL Process stopped after the drop operation.") + conn.close() + return + loader_obj.create_and_insert(conn, transformed_data) + conn.close() + print("ETL Process Completed Successfully!") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/03_labs/Arhum_Qaiser/Sales_python_project/requirements.txt b/03_labs/Arhum_Qaiser/Sales_python_project/requirements.txt new file mode 100644 index 0000000..9096055 --- /dev/null +++ b/03_labs/Arhum_Qaiser/Sales_python_project/requirements.txt @@ -0,0 +1,24 @@ +ο»Ώclick==8.3.0 +cloudpickle==3.1.1 +colorama==0.4.6 +dask==2025.10.0 +et_xmlfile==2.0.0 +fsspec==2025.9.0 +greenlet==3.2.4 +locket==1.0.0 +numpy==2.3.4 +openpyxl==3.1.5 +packaging==25.0 +pandas==2.3.3 +partd==1.4.2 +pyodbc==5.3.0 +python-dateutil==2.9.0.post0 +pytz==2025.2 +PyYAML==6.0.3 +setuptools==80.9.0 +six==1.17.0 +SQLAlchemy==2.0.44 +toolz==1.1.0 +typing_extensions==4.15.0 +tzdata==2025.2 +wheel==0.45.1 diff --git a/03_labs/Arhum_Qaiser/Sales_python_project/sales_etl.log b/03_labs/Arhum_Qaiser/Sales_python_project/sales_etl.log new file mode 100644 index 0000000..53d7a0f --- /dev/null +++ b/03_labs/Arhum_Qaiser/Sales_python_project/sales_etl.log @@ -0,0 +1,59 @@ +2025-10-27 14:43:31,335 - sales_ETL - INFO - Successfully read 9135 rows from Excel. +2025-10-27 14:43:31,335 - sales_ETL - INFO - Successfully transformed 0 records. +2025-10-27 14:43:31,459 - sales_ETL - ERROR - Database connection failed: ('28000', "[28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Login failed for user 'arhum'. (18456) (SQLDriverConnect); [28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Login failed for user 'arhum'. (18456)") +2025-10-27 14:47:55,919 - sales_ETL - INFO - Successfully read 9135 rows from Excel. +2025-10-27 14:47:55,920 - sales_ETL - INFO - Successfully transformed 0 records. +2025-10-27 14:47:55,961 - sales_ETL - ERROR - Database connection failed: ('28000', "[28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Login failed for user 'arhum'. (18456) (SQLDriverConnect); [28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Login failed for user 'arhum'. (18456)") +2025-10-27 14:57:01,904 - sales_ETL - INFO - Successfully read 9135 rows from Excel. +2025-10-27 14:57:01,904 - sales_ETL - INFO - Successfully transformed 0 records. +2025-10-27 14:57:01,959 - sales_ETL - ERROR - Database connection failed: ('28000', '[28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Login failed for user \'ARHUM-PC\\Arhum Qaisar\'. (18456) (SQLDriverConnect); [28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Cannot open database "SalesDB" requested by the login. The login failed. (4060); [28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Login failed for user \'ARHUM-PC\\Arhum Qaisar\'. (18456); [28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Cannot open database "SalesDB" requested by the login. The login failed. (4060)') +2025-10-27 15:01:46,258 - sales_ETL - INFO - Successfully read 9135 rows from Excel. +2025-10-27 15:01:46,259 - sales_ETL - INFO - Successfully transformed 0 records. +2025-10-27 15:01:46,259 - sales_ETL - ERROR - Database connection failed: 'ODBC Driver 17 for SQL Server' +2025-10-27 15:04:36,782 - sales_ETL - INFO - Successfully read 9135 rows from Excel. +2025-10-27 15:04:36,783 - sales_ETL - INFO - Successfully transformed 0 records. +2025-10-27 15:04:36,783 - sales_ETL - ERROR - Database connection failed: 'ODBC Driver 17 for SQL Server' +2025-10-27 15:05:17,350 - sales_ETL - INFO - Successfully read 9135 rows from Excel. +2025-10-27 15:05:17,351 - sales_ETL - INFO - Successfully transformed 0 records. +2025-10-27 15:05:17,353 - sales_ETL - ERROR - Database connection failed: ('IM002', '[IM002] [Microsoft][ODBC Driver Manager] Data source name not found and no default driver specified (0) (SQLDriverConnect)') +2025-10-27 15:05:44,304 - sales_ETL - INFO - Successfully read 9135 rows from Excel. +2025-10-27 15:05:44,304 - sales_ETL - INFO - Successfully transformed 0 records. +2025-10-27 15:06:01,619 - sales_ETL - ERROR - Database connection failed: ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]SQL Server Network Interfaces: Error Locating Server/Instance Specified [xFFFFFFFF]. (-1) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (-1)') +2025-10-27 15:06:51,503 - sales_ETL - INFO - Successfully read 9135 rows from Excel. +2025-10-27 15:06:51,504 - sales_ETL - INFO - Successfully transformed 0 records. +2025-10-27 15:06:51,553 - sales_ETL - ERROR - Database connection failed: ('28000', '[28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Login failed for user \'ARHUM-PC\\Arhum Qaisar\'. (18456) (SQLDriverConnect); [28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Cannot open database "SalesDB" requested by the login. The login failed. (4060); [28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Login failed for user \'ARHUM-PC\\Arhum Qaisar\'. (18456); [28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Cannot open database "SalesDB" requested by the login. The login failed. (4060)') +2025-10-27 15:13:39,698 - sales_ETL - INFO - Successfully read 9135 rows from Excel. +2025-10-27 15:13:39,699 - sales_ETL - INFO - Successfully transformed 0 records. +2025-10-27 15:13:39,779 - sales_ETL - INFO - Successfully connected to the database using Windows Authentication. +2025-10-27 15:13:39,797 - sales_ETL - ERROR - Failed to create table: ('42S01', "[42S01] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]There is already an object named 'Sales' in the database. (2714) (SQLExecDirectW)") +2025-10-27 15:13:39,798 - sales_ETL - INFO - Inserted 0 records into SalesData table. +2025-10-27 15:14:47,534 - sales_ETL - INFO - Successfully read 9135 rows from Excel. +2025-10-27 15:14:47,535 - sales_ETL - INFO - Successfully transformed 0 records. +2025-10-27 15:14:47,592 - sales_ETL - INFO - Successfully connected to the database using Windows Authentication. +2025-10-27 15:14:47,609 - sales_ETL - INFO - SalesData table is ready. +2025-10-27 15:14:47,609 - sales_ETL - INFO - Inserted 0 records into SalesData table. +2025-10-27 15:15:12,860 - sales_ETL - INFO - Successfully read 9135 rows from Excel. +2025-10-27 15:15:12,861 - sales_ETL - INFO - Successfully transformed 0 records. +2025-10-27 15:15:12,915 - sales_ETL - INFO - Successfully connected to the database using Windows Authentication. +2025-10-27 15:15:12,921 - sales_ETL - INFO - SalesData table is ready. +2025-10-27 15:15:12,921 - sales_ETL - INFO - Inserted 0 records into SalesData table. +2025-10-27 15:23:43,910 - sales_ETL - INFO - Successfully read 9135 rows from Excel. +2025-10-27 15:23:43,915 - sales_ETL - INFO - Successfully transformed 9135 records. +2025-10-27 15:23:43,996 - sales_ETL - INFO - Successfully connected to the database using Windows Authentication. +2025-10-27 15:23:44,006 - sales_ETL - INFO - SalesData table is ready. +2025-10-27 15:23:45,129 - sales_ETL - INFO - Inserted 9135 records into SalesData table. +2025-10-27 18:45:03,675 - sales_ETL - INFO - Successfully read 9135 rows from Excel. +2025-10-27 18:45:03,681 - sales_ETL - INFO - Successfully transformed 9135 records. +2025-10-27 18:45:03,856 - sales_ETL - INFO - Successfully connected to the database using Windows Authentication. +2025-10-27 18:45:03,902 - sales_ETL - INFO - SalesData table is ready. +2025-10-27 18:45:05,009 - sales_ETL - INFO - Inserted 9135 records into SalesData table. +2025-10-28 17:01:28,800 - sales_ETL - INFO - Successfully read 9135 rows from Excel. +2025-10-28 17:01:28,810 - sales_ETL - INFO - Successfully transformed 9135 records. +2025-10-28 17:01:28,967 - sales_ETL - INFO - Successfully connected to the database using Windows Authentication. +2025-10-28 17:01:28,987 - sales_ETL - INFO - SalesData table is ready. +2025-10-28 17:01:30,774 - sales_ETL - INFO - Inserted 9135 records into SalesData table. +2025-10-28 17:09:23,613 - sales_ETL - INFO - Successfully read 9135 rows from Excel. +2025-10-28 17:09:23,634 - sales_ETL - INFO - Successfully transformed 9135 records. +2025-10-28 17:09:23,844 - sales_ETL - INFO - Successfully connected to the database using Windows Authentication. +2025-10-28 17:09:23,867 - sales_ETL - INFO - SalesData table is ready. +2025-10-28 17:09:27,215 - sales_ETL - INFO - Inserted 9135 records into SalesData table. diff --git a/03_labs/Arhum_Qaiser/Sales_python_project/sales_etl.md b/03_labs/Arhum_Qaiser/Sales_python_project/sales_etl.md new file mode 100644 index 0000000..982d6ef --- /dev/null +++ b/03_labs/Arhum_Qaiser/Sales_python_project/sales_etl.md @@ -0,0 +1,33 @@ +# Sales Data ETL Pipeline + +A modular **ETL (Extract, Transform, Load)** project for processing sales data from Excel files into a SQL Server database using Python. +The project includes structured logging,configurable database connectivity. + +--- + +## Overview + +This ETL pipeline automates the process of reading sales data from Excel, cleaning and transforming it, and loading it into a SQL Server table named **`Sales`**. + +The process follows a simple 3-step design: + +1. **Extractor** β€” Reads raw sales data from Excel files. +2. **Transformer** β€” Cleans, validates, and formats data. +3. **Loader** β€” Loads transformed data into SQL Server. + +Comprehensive logging ensures each stage of the ETL process is traceable and auditable. + +--- + +## πŸ“ Project Structure + +### Sales_python_project + +- **config.py** β€” Configuration file (database connection, file paths) +- **extractor.py** β€” Extracts data from Excel +- **transformer.py** β€” Transforms/cleans the extracted data +- **loader.py** β€” Loads data into SQL Server +- **logger_config.py** β€” Logger setup for consistent logging +- **main.py** β€” Main ETL execution script +- **sales_transactions.xlsx** β€” Input data file +- **README.md** β€” Project documentation diff --git a/03_labs/Arhum_Qaiser/Sales_python_project/sales_transactions.xlsx b/03_labs/Arhum_Qaiser/Sales_python_project/sales_transactions.xlsx new file mode 100644 index 0000000..588ca36 Binary files /dev/null and b/03_labs/Arhum_Qaiser/Sales_python_project/sales_transactions.xlsx differ diff --git a/03_labs/Arhum_Qaiser/Sales_python_project/test_conn.py b/03_labs/Arhum_Qaiser/Sales_python_project/test_conn.py new file mode 100644 index 0000000..e677b7c --- /dev/null +++ b/03_labs/Arhum_Qaiser/Sales_python_project/test_conn.py @@ -0,0 +1,11 @@ +import pyodbc + +try: + conn = pyodbc.connect( + f"DRIVER={{ODBC Driver 17 for SQL Server}};" + "SERVER=ARHUM-PC\SQLEXPRESS;" + "trusted_connection=yes;" + ) + print("Connection successful!") +except Exception as e: + print(f"Connection failed: {e}") \ No newline at end of file diff --git a/03_labs/Arhum_Qaiser/Sales_python_project/transformer.py b/03_labs/Arhum_Qaiser/Sales_python_project/transformer.py new file mode 100644 index 0000000..e7d006b --- /dev/null +++ b/03_labs/Arhum_Qaiser/Sales_python_project/transformer.py @@ -0,0 +1,30 @@ +from logger_config import setup_logger + +class transformer: + def __init__(self): + self.logger = setup_logger() + + def clean_data(self, data): + transformed_data = [] + for record in data: + try: + report_id = str(record["ReportID"]) + sales_person_id = str(record["SalesPersonID"]) + product_id = str(record["ProductID"]) + quantity = str(record["Quantity"]) + total_sales = float(record["TotalSalesValue"]) + + transformed_data.append({ + "ReportID": report_id, + "SalesPersonID": sales_person_id, + "ProductID": product_id, + "Quantity": quantity, + "TotalSalesValue": total_sales + }) + + except Exception as e: + self.logger.warning(f"Value is missing while transforming {record}: {e}") + break + + self.logger.info(f"Successfully transformed {len(transformed_data)} records.") + return transformed_data