Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
7 changes: 7 additions & 0 deletions 03_labs/Arhum_Qaiser/Sales_python_project/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
config_dba = {
"server": r"localhost\SQLEXPRESS",
"database": "SalesDB",
"driver": "ODBC Driver 17 for SQL Server"
}

excel_file_path = r"C:\Users\Arhum Qaisar\Documents\Fabric-Project-Repo\Python-for-Data-Engineering\03_labs\Arhum_Qaiser\Sales_python_project\sales_transactions.xlsx"
27 changes: 27 additions & 0 deletions 03_labs/Arhum_Qaiser/Sales_python_project/extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from openpyxl import load_workbook
from logger_config import setup_logger
from config import excel_file_path

class extractor :
def __init__(self) :
self.logger = setup_logger()

def read_excel(self):

data = []
try:
workbook = load_workbook(excel_file_path)
sheet = workbook.active
headers = [cell.value for cell in sheet[1]]

for row in sheet.iter_rows(min_row=2, values_only=True):
row_data = dict(zip(headers, row))
data.append(row_data)

self.logger.info(f"Successfully read {len(data)} rows from Excel.")
except FileNotFoundError:
self.logger.error("Excel file not found. Check the file path.")
except Exception as e:
self.logger.error(f"Error reading Excel file: {e}")

return data
69 changes: 69 additions & 0 deletions 03_labs/Arhum_Qaiser/Sales_python_project/loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import pyodbc
from logger_config import setup_logger
from config import config_dba

class loader:
def __init__(self):
self.logger = setup_logger()

def connect(self):
try:
conn_str = (
f"DRIVER={{{config_dba['driver']}}};"
f"SERVER={config_dba['server']};"
f"DATABASE={config_dba['database']};"
"Trusted_Connection=yes;"
)
conn = pyodbc.connect(conn_str)
self.logger.info("Successfully connected to the database using Windows Authentication.")
return conn
except Exception as e:
self.logger.error(f"Database connection failed: {e}")
return None

def delete_table(self, conn):
drop_table_query = "DROP TABLE IF EXISTS Sales"
try:
cursor = conn.cursor()
cursor.execute(drop_table_query)
conn.commit()
self.logger.info("Sales table dropped successfully.")
except Exception as e:
self.logger.error(f"Failed to drop table: {e}")

def create_and_insert(self, conn, records):
create_table_query = """
IF NOT EXISTS (SELECT * FROM sysobjects WHERE name='Sales')
CREATE TABLE Sales (
ReportID varchar(50),
SalesPersonID varchar(50),
ProductID varchar(50),
Quantity varchar(50),
TotalSalesValue FLOAT
)
"""
try:
cursor = conn.cursor()
cursor.execute(create_table_query)
conn.commit()
self.logger.info("SalesData table is ready.")
except Exception as e:
self.logger.error(f"Failed to create table: {e}")

inserted = 0
for r in records:
try:
cursor.execute(
"""
INSERT INTO Sales (ReportID, SalesPersonID, ProductID, Quantity, TotalSalesValue)
VALUES (?, ?, ?, ?, ?)
""",
r["ReportID"], r["SalesPersonID"], r["ProductID"], r["Quantity"], r["TotalSalesValue"]
)
inserted += 1
except Exception as e:
self.logger.error(f"Failed to insert record {r}: {e}")

conn.commit()
cursor.close()
self.logger.info(f"Inserted {inserted} records into SalesData table.")
19 changes: 19 additions & 0 deletions 03_labs/Arhum_Qaiser/Sales_python_project/logger_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import logging

def setup_logger() :
logger = logging.getLogger('sales_ETL')
logger.setLevel(logging.INFO)

formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

file_handler = logging.FileHandler('sales_etl.log')
file_handler.setFormatter(formatter)

console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)

if not logger.hasHandlers():
logger.addHandler(file_handler)
logger.addHandler(console_handler)

return logger
34 changes: 34 additions & 0 deletions 03_labs/Arhum_Qaiser/Sales_python_project/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from extractor import extractor
from transformer import transformer
from loader import loader
from logger_config import setup_logger

def main():
print("Starting ETL process...")

extractor_obj = extractor()
transformer_obj = transformer()
loader_obj = loader()

# Step 1: Extract
data = extractor_obj.read_excel()

# Step 2: Transform
transformed_data = transformer_obj.clean_data(data)

# Step 3: Load
conn = loader_obj.connect()
if conn:
loader_obj.delete_table(conn)
loader_obj.logger.info("Prompting user for confirmation before data insertion.")
proceed = input("Do you want to continue with data insertion? (yes/no): ")
if proceed.lower() != 'yes':
print("ETL Process stopped after the drop operation.")
conn.close()
return
loader_obj.create_and_insert(conn, transformed_data)
conn.close()
print("ETL Process Completed Successfully!")

if __name__ == "__main__":
main()
24 changes: 24 additions & 0 deletions 03_labs/Arhum_Qaiser/Sales_python_project/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
click==8.3.0
cloudpickle==3.1.1
colorama==0.4.6
dask==2025.10.0
et_xmlfile==2.0.0
fsspec==2025.9.0
greenlet==3.2.4
locket==1.0.0
numpy==2.3.4
openpyxl==3.1.5
packaging==25.0
pandas==2.3.3
partd==1.4.2
pyodbc==5.3.0
python-dateutil==2.9.0.post0
pytz==2025.2
PyYAML==6.0.3
setuptools==80.9.0
six==1.17.0
SQLAlchemy==2.0.44
toolz==1.1.0
typing_extensions==4.15.0
tzdata==2025.2
wheel==0.45.1
59 changes: 59 additions & 0 deletions 03_labs/Arhum_Qaiser/Sales_python_project/sales_etl.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
2025-10-27 14:43:31,335 - sales_ETL - INFO - Successfully read 9135 rows from Excel.
2025-10-27 14:43:31,335 - sales_ETL - INFO - Successfully transformed 0 records.
2025-10-27 14:43:31,459 - sales_ETL - ERROR - Database connection failed: ('28000', "[28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Login failed for user 'arhum'. (18456) (SQLDriverConnect); [28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Login failed for user 'arhum'. (18456)")
2025-10-27 14:47:55,919 - sales_ETL - INFO - Successfully read 9135 rows from Excel.
2025-10-27 14:47:55,920 - sales_ETL - INFO - Successfully transformed 0 records.
2025-10-27 14:47:55,961 - sales_ETL - ERROR - Database connection failed: ('28000', "[28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Login failed for user 'arhum'. (18456) (SQLDriverConnect); [28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Login failed for user 'arhum'. (18456)")
2025-10-27 14:57:01,904 - sales_ETL - INFO - Successfully read 9135 rows from Excel.
2025-10-27 14:57:01,904 - sales_ETL - INFO - Successfully transformed 0 records.
2025-10-27 14:57:01,959 - sales_ETL - ERROR - Database connection failed: ('28000', '[28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Login failed for user \'ARHUM-PC\\Arhum Qaisar\'. (18456) (SQLDriverConnect); [28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Cannot open database "SalesDB" requested by the login. The login failed. (4060); [28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Login failed for user \'ARHUM-PC\\Arhum Qaisar\'. (18456); [28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Cannot open database "SalesDB" requested by the login. The login failed. (4060)')
2025-10-27 15:01:46,258 - sales_ETL - INFO - Successfully read 9135 rows from Excel.
2025-10-27 15:01:46,259 - sales_ETL - INFO - Successfully transformed 0 records.
2025-10-27 15:01:46,259 - sales_ETL - ERROR - Database connection failed: 'ODBC Driver 17 for SQL Server'
2025-10-27 15:04:36,782 - sales_ETL - INFO - Successfully read 9135 rows from Excel.
2025-10-27 15:04:36,783 - sales_ETL - INFO - Successfully transformed 0 records.
2025-10-27 15:04:36,783 - sales_ETL - ERROR - Database connection failed: 'ODBC Driver 17 for SQL Server'
2025-10-27 15:05:17,350 - sales_ETL - INFO - Successfully read 9135 rows from Excel.
2025-10-27 15:05:17,351 - sales_ETL - INFO - Successfully transformed 0 records.
2025-10-27 15:05:17,353 - sales_ETL - ERROR - Database connection failed: ('IM002', '[IM002] [Microsoft][ODBC Driver Manager] Data source name not found and no default driver specified (0) (SQLDriverConnect)')
2025-10-27 15:05:44,304 - sales_ETL - INFO - Successfully read 9135 rows from Excel.
2025-10-27 15:05:44,304 - sales_ETL - INFO - Successfully transformed 0 records.
2025-10-27 15:06:01,619 - sales_ETL - ERROR - Database connection failed: ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]SQL Server Network Interfaces: Error Locating Server/Instance Specified [xFFFFFFFF]. (-1) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (-1)')
2025-10-27 15:06:51,503 - sales_ETL - INFO - Successfully read 9135 rows from Excel.
2025-10-27 15:06:51,504 - sales_ETL - INFO - Successfully transformed 0 records.
2025-10-27 15:06:51,553 - sales_ETL - ERROR - Database connection failed: ('28000', '[28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Login failed for user \'ARHUM-PC\\Arhum Qaisar\'. (18456) (SQLDriverConnect); [28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Cannot open database "SalesDB" requested by the login. The login failed. (4060); [28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Login failed for user \'ARHUM-PC\\Arhum Qaisar\'. (18456); [28000] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Cannot open database "SalesDB" requested by the login. The login failed. (4060)')
2025-10-27 15:13:39,698 - sales_ETL - INFO - Successfully read 9135 rows from Excel.
2025-10-27 15:13:39,699 - sales_ETL - INFO - Successfully transformed 0 records.
2025-10-27 15:13:39,779 - sales_ETL - INFO - Successfully connected to the database using Windows Authentication.
2025-10-27 15:13:39,797 - sales_ETL - ERROR - Failed to create table: ('42S01', "[42S01] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]There is already an object named 'Sales' in the database. (2714) (SQLExecDirectW)")
2025-10-27 15:13:39,798 - sales_ETL - INFO - Inserted 0 records into SalesData table.
2025-10-27 15:14:47,534 - sales_ETL - INFO - Successfully read 9135 rows from Excel.
2025-10-27 15:14:47,535 - sales_ETL - INFO - Successfully transformed 0 records.
2025-10-27 15:14:47,592 - sales_ETL - INFO - Successfully connected to the database using Windows Authentication.
2025-10-27 15:14:47,609 - sales_ETL - INFO - SalesData table is ready.
2025-10-27 15:14:47,609 - sales_ETL - INFO - Inserted 0 records into SalesData table.
2025-10-27 15:15:12,860 - sales_ETL - INFO - Successfully read 9135 rows from Excel.
2025-10-27 15:15:12,861 - sales_ETL - INFO - Successfully transformed 0 records.
2025-10-27 15:15:12,915 - sales_ETL - INFO - Successfully connected to the database using Windows Authentication.
2025-10-27 15:15:12,921 - sales_ETL - INFO - SalesData table is ready.
2025-10-27 15:15:12,921 - sales_ETL - INFO - Inserted 0 records into SalesData table.
2025-10-27 15:23:43,910 - sales_ETL - INFO - Successfully read 9135 rows from Excel.
2025-10-27 15:23:43,915 - sales_ETL - INFO - Successfully transformed 9135 records.
2025-10-27 15:23:43,996 - sales_ETL - INFO - Successfully connected to the database using Windows Authentication.
2025-10-27 15:23:44,006 - sales_ETL - INFO - SalesData table is ready.
2025-10-27 15:23:45,129 - sales_ETL - INFO - Inserted 9135 records into SalesData table.
2025-10-27 18:45:03,675 - sales_ETL - INFO - Successfully read 9135 rows from Excel.
2025-10-27 18:45:03,681 - sales_ETL - INFO - Successfully transformed 9135 records.
2025-10-27 18:45:03,856 - sales_ETL - INFO - Successfully connected to the database using Windows Authentication.
2025-10-27 18:45:03,902 - sales_ETL - INFO - SalesData table is ready.
2025-10-27 18:45:05,009 - sales_ETL - INFO - Inserted 9135 records into SalesData table.
2025-10-28 17:01:28,800 - sales_ETL - INFO - Successfully read 9135 rows from Excel.
2025-10-28 17:01:28,810 - sales_ETL - INFO - Successfully transformed 9135 records.
2025-10-28 17:01:28,967 - sales_ETL - INFO - Successfully connected to the database using Windows Authentication.
2025-10-28 17:01:28,987 - sales_ETL - INFO - SalesData table is ready.
2025-10-28 17:01:30,774 - sales_ETL - INFO - Inserted 9135 records into SalesData table.
2025-10-28 17:09:23,613 - sales_ETL - INFO - Successfully read 9135 rows from Excel.
2025-10-28 17:09:23,634 - sales_ETL - INFO - Successfully transformed 9135 records.
2025-10-28 17:09:23,844 - sales_ETL - INFO - Successfully connected to the database using Windows Authentication.
2025-10-28 17:09:23,867 - sales_ETL - INFO - SalesData table is ready.
2025-10-28 17:09:27,215 - sales_ETL - INFO - Inserted 9135 records into SalesData table.
33 changes: 33 additions & 0 deletions 03_labs/Arhum_Qaiser/Sales_python_project/sales_etl.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Sales Data ETL Pipeline

A modular **ETL (Extract, Transform, Load)** project for processing sales data from Excel files into a SQL Server database using Python.
The project includes structured logging,configurable database connectivity.

---

## Overview

This ETL pipeline automates the process of reading sales data from Excel, cleaning and transforming it, and loading it into a SQL Server table named **`Sales`**.

The process follows a simple 3-step design:

1. **Extractor** — Reads raw sales data from Excel files.
2. **Transformer** — Cleans, validates, and formats data.
3. **Loader** — Loads transformed data into SQL Server.

Comprehensive logging ensures each stage of the ETL process is traceable and auditable.

---

## 📁 Project Structure

### Sales_python_project

- **config.py** — Configuration file (database connection, file paths)
- **extractor.py** — Extracts data from Excel
- **transformer.py** — Transforms/cleans the extracted data
- **loader.py** — Loads data into SQL Server
- **logger_config.py** — Logger setup for consistent logging
- **main.py** — Main ETL execution script
- **sales_transactions.xlsx** — Input data file
- **README.md** — Project documentation
Binary file not shown.
11 changes: 11 additions & 0 deletions 03_labs/Arhum_Qaiser/Sales_python_project/test_conn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import pyodbc

try:
conn = pyodbc.connect(
f"DRIVER={{ODBC Driver 17 for SQL Server}};"
"SERVER=ARHUM-PC\SQLEXPRESS;"
"trusted_connection=yes;"
)
print("Connection successful!")
except Exception as e:
print(f"Connection failed: {e}")
30 changes: 30 additions & 0 deletions 03_labs/Arhum_Qaiser/Sales_python_project/transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from logger_config import setup_logger

class transformer:
def __init__(self):
self.logger = setup_logger()

def clean_data(self, data):
transformed_data = []
for record in data:
try:
report_id = str(record["ReportID"])
sales_person_id = str(record["SalesPersonID"])
product_id = str(record["ProductID"])
quantity = str(record["Quantity"])
total_sales = float(record["TotalSalesValue"])

transformed_data.append({
"ReportID": report_id,
"SalesPersonID": sales_person_id,
"ProductID": product_id,
"Quantity": quantity,
"TotalSalesValue": total_sales
})

except Exception as e:
self.logger.warning(f"Value is missing while transforming {record}: {e}")
break

self.logger.info(f"Successfully transformed {len(transformed_data)} records.")
return transformed_data