# ETL Load

## Modules

In [1]:
import pandas as pd
import sqlite3

## Preamble

Loading the transformed CSV files as Pandas dataframes.

In [2]:
# Loading the transformed files
df_full = pd.read_csv("transformed/transformed_full.csv")
df_incremental = pd.read_csv("transformed/transformed_incremental.csv")

Observing the data inside the transformed CSV files

In [3]:
# Displaying the full and incremental transformed dataframes
df_full.head()

Unnamed: 0,customer_name,product,quantity,unit_price,order_date,region,total_price
0,Diana,Tablet,2,500,20 January 2024,South,1000
1,Eve,Laptop,2,500,29 April 2024,North,1000
2,Charlie,Laptop,2,250,08 January 2024,South,500
3,Eve,Laptop,2,750,07 January 2024,West,1500
4,Eve,Tablet,3,500,07 March 2024,South,1500


In [4]:
df_incremental.head()

Unnamed: 0,customer_name,product,quantity,unit_price,order_date,region,total_price
0,Alice,Laptop,2,900.0,09 May 2024,Central,1800.0
1,Heidi,Laptop,1,300.0,07 May 2024,Central,300.0
2,Heidi,Laptop,1,600.0,04 May 2024,Central,600.0
3,Heidi,Tablet,2,300.0,26 May 2024,Central,600.0
4,Heidi,Tablet,2,600.0,21 May 2024,North,1200.0


## Loading to SQLite Database

### Connecting to SQLite

In [5]:
# Creating the connections
full_conn = sqlite3.connect("loaded/full_data.db")
incremental_conn = sqlite3.connect("loaded/incremental_data.db")

### Creating the Databases and Tables

In [6]:
# Creating the databases and tables
df_full.to_sql("full_data", con=full_conn, if_exists="replace", index=False)
df_incremental.to_sql("incremental_data", con=incremental_conn, if_exists="replace", index=False)

10

### Previewing Stored Data

In [7]:
# Previewing the data using SQL Queries
df_full_loaded = pd.read_sql_query("SELECT * FROM full_data LIMIT 5", full_conn)
df_full_loaded

Unnamed: 0,customer_name,product,quantity,unit_price,order_date,region,total_price
0,Diana,Tablet,2,500,20 January 2024,South,1000
1,Eve,Laptop,2,500,29 April 2024,North,1000
2,Charlie,Laptop,2,250,08 January 2024,South,500
3,Eve,Laptop,2,750,07 January 2024,West,1500
4,Eve,Tablet,3,500,07 March 2024,South,1500


In [8]:
df_incremental_loaded = pd.read_sql_query("SELECT * FROM incremental_data LIMIT 5", incremental_conn)
df_incremental_loaded

Unnamed: 0,customer_name,product,quantity,unit_price,order_date,region,total_price
0,Alice,Laptop,2,900.0,09 May 2024,Central,1800.0
1,Heidi,Laptop,1,300.0,07 May 2024,Central,300.0
2,Heidi,Laptop,1,600.0,04 May 2024,Central,600.0
3,Heidi,Tablet,2,300.0,26 May 2024,Central,600.0
4,Heidi,Tablet,2,600.0,21 May 2024,North,1200.0


### Closing the Connections

In [9]:
# Closing connections
full_conn.close()
incremental_conn.close()