How you can use pygrametl to read data from a CSV file, transform it, and then load it into a PostgreSQL database:

In [None]:
from pygrametl.datasources import CSVSource
from pygrametl.tables import Dimension, FactTable
from pygrametl.transforms import Lookup, Trim, HashKey
from pygrametl.etl import ETLManager
import psycopg2

# Connect to PostgreSQL database
conn = psycopg2.connect(database="mydatabase", user="myusername", password="mypassword", host="localhost", port="5432")
cursor = conn.cursor()

# Define the CSV source
csv_source = CSVSource('mydata.csv', delimiter=',')

# Define a dimension table
dim_customer = Dimension(
    name='customer',
    key='customer_id',
    attributes=['customer_name', 'customer_email'],
    lookupatts=['customer_name', 'customer_email'],
    lookups=[Lookup('customer', 'customer_name', 'customer_name'), Lookup('customer', 'customer_email', 'customer_email')],
    cursor=cursor
)

# Define a fact table
fact_orders = FactTable(
    name='orders',
    keyrefs=['customer_id', 'order_date'],
    measures=['total_amount'],
    cursor=cursor
)

# Define the ETL manager
manager = ETLManager()

# Define the ETL process
manager.adddatasource('customers', csv_source)
manager.addtransformation('customer_name', Trim, 'customers', 'name')
manager.addtransformation('customer_email', Trim, 'customers', 'email')
manager.addtransformation('customer_id', HashKey, 'customers', ['name', 'email'], keyfield='customer_id')
manager.adddimension('customer_dim', dim_customer, ['customer_id', 'customer_name', 'customer_email'])
manager.addfacttable('orders_fact', fact_orders, ['customer_id', 'order_date', 'total_amount'])

# Execute the ETL process
manager.execute()

# Commit the changes to the database
conn.commit()

# Close the database connection
cursor.close()
conn.close()

Automating the above code using APScheduler 

In [None]:
from pygrametl.datasources import CSVSource
from pygrametl.tables import Dimension, FactTable
from pygrametl.transforms import Lookup, Trim, HashKey
from pygrametl.etl import ETLManager
import psycopg2
from apscheduler.schedulers.blocking import BlockingScheduler

# Connect to PostgreSQL database
conn = psycopg2.connect(database="mydatabase", user="myusername", password="mypassword", host="localhost", port="5432")
cursor = conn.cursor()

# Define the CSV source, dimension table, fact table, ETL manager as in the previous example
# Define the CSV source
csv_source = CSVSource('mydata.csv', delimiter=',')

# Define a dimension table
dim_customer = Dimension(
    name='customer',
    key='customer_id',
    attributes=['customer_name', 'customer_email'],
    lookupatts=['customer_name', 'customer_email'],
    lookups=[Lookup('customer', 'customer_name', 'customer_name'), Lookup('customer', 'customer_email', 'customer_email')],
    cursor=cursor
)

# Define a fact table
fact_orders = FactTable(
    name='orders',
    keyrefs=['customer_id', 'order_date'],
    measures=['total_amount'],
    cursor=cursor
)

# Define the ETL manager
manager = ETLManager()

# Define the ETL process
manager.adddatasource('customers', csv_source)
manager.addtransformation('customer_name', Trim, 'customers', 'name')
manager.addtransformation('customer_email', Trim, 'customers', 'email')
manager.addtransformation('customer_id', HashKey, 'customers', ['name', 'email'], keyfield='customer_id')
manager.adddimension('customer_dim', dim_customer, ['customer_id', 'customer_name', 'customer_email'])
manager.addfacttable('orders_fact', fact_orders, ['customer_id', 'order_date', 'total_amount'])

# Execute the ETL process
manager.execute()

# Commit the changes to the database
conn.commit()

# Close the database connection
cursor.close()
conn.close()

# Define the function to execute the ETL process
def execute_etl():
    manager.execute()

# Create the scheduler and schedule the ETL process to run every day at 1 AM
scheduler = BlockingScheduler()
scheduler.add_job(execute_etl, 'cron', hour='1', minute='0')

# Start the scheduler
scheduler.start()

# Close the database connection
cursor.close()
conn.close()