# ✅ Week 6 ADF Assignment Simulation in Colab

This notebook simulates the full Week 6 ADF assignment using SQLite databases, pandas, and retry logic. It covers:
- On-prem → cloud transfer
- Full and Incremental loads
- Watermarking
- FTP download simulation
- Retry logic
- Simulated scheduling (daily/monthly)

In [1]:
# 📦 Install Required Packages
!pip install pandas sqlalchemy apscheduler paramiko

Collecting apscheduler
  Downloading APScheduler-3.11.0-py3-none-any.whl.metadata (6.4 kB)
Collecting paramiko
  Downloading paramiko-3.5.1-py3-none-any.whl.metadata (4.6 kB)
Collecting bcrypt>=3.2 (from paramiko)
  Downloading bcrypt-4.3.0-cp39-abi3-manylinux_2_34_x86_64.whl.metadata (10 kB)
Collecting pynacl>=1.5 (from paramiko)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading APScheduler-3.11.0-py3-none-any.whl (64 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.0/64.0 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading paramiko-3.5.1-py3-none-any.whl (227 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.3/227.3 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bcrypt-4.3.0-cp39-abi3-manylinux_2_34_x86_64.whl (284 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.2/284.2 kB[0m [31m20.0 MB/s[0m eta [36m0:00:

In [2]:
# 📁 Step 1: Setup onprem.db (source) with sample data
import sqlite3
from datetime import datetime, timedelta

src_conn = sqlite3.connect('onprem.db')
cur = src_conn.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS source_table (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    data TEXT,
    last_modified TEXT
)''')

# Insert 3 sample rows
now = datetime.now()
rows = [
    ('First row', (now - timedelta(days=3)).strftime('%Y-%m-%d %H:%M:%S')),
    ('Second row', (now - timedelta(days=1)).strftime('%Y-%m-%d %H:%M:%S')),
    ('Third row', now.strftime('%Y-%m-%d %H:%M:%S'))
]
cur.executemany('INSERT INTO source_table (data, last_modified) VALUES (?, ?)', rows)
src_conn.commit()
src_conn.close()
print('✅ onprem.db setup complete.')

✅ onprem.db setup complete.


In [3]:
# ☁️ Step 2: Setup cloud.db (destination + watermark table)
tgt_conn = sqlite3.connect('cloud.db')
tgt_cur = tgt_conn.cursor()
tgt_cur.execute('''CREATE TABLE IF NOT EXISTS destination_table (
    id INTEGER PRIMARY KEY,
    data TEXT,
    last_modified TEXT
)''')
tgt_cur.execute('''CREATE TABLE IF NOT EXISTS watermark_control (
    id INTEGER PRIMARY KEY,
    last_modified TEXT
)''')
tgt_conn.commit()
tgt_conn.close()
print('✅ cloud.db setup complete.')

✅ cloud.db setup complete.


In [4]:
# 🔁 Step 3: Full Load Function
import pandas as pd

def full_load():
    src = sqlite3.connect('onprem.db')
    tgt = sqlite3.connect('cloud.db')
    df = pd.read_sql('SELECT * FROM source_table', src)
    df.to_sql('destination_table', tgt, if_exists='replace', index=False)
    src.close()
    tgt.close()
    print(f'✅ Full load done: {len(df)} rows.')

# Run full load
full_load()

✅ Full load done: 3 rows.


In [5]:
# 🕓 Step 4: Incremental Load Function with Watermark
def incremental_load():
    tgt = sqlite3.connect('cloud.db')
    cur = tgt.cursor()
    cur.execute("SELECT last_modified FROM watermark_control WHERE id=1")
    row = cur.fetchone()
    last_wm = row[0] if row else '1900-01-01 00:00:00'

    src = sqlite3.connect('onprem.db')
    df = pd.read_sql(f"""
        SELECT * FROM source_table WHERE last_modified > '{last_wm}'
    """, src)

    if not df.empty:
        df.to_sql('destination_table', tgt, if_exists='append', index=False)
        new_wm = df['last_modified'].max()
        cur.execute("REPLACE INTO watermark_control (id, last_modified) VALUES (1, ?)", (new_wm,))
        tgt.commit()
        print(f'✅ Incremental load: {len(df)} rows, watermark updated to {new_wm}')
    else:
        print('✅ No new data found.')

    src.close()
    tgt.close()

# Run incremental load
incremental_load()

✅ Incremental load: 3 rows, watermark updated to 2025-07-13 08:21:35


In [6]:
# 🔁 Retry Logic Example
import time

def retry(func, retries=3, delay=3):
    for i in range(retries):
        try:
            return func()
        except Exception as e:
            print(f'⚠️ Retry {i+1}/{retries} failed: {e}')
            time.sleep(delay)
    print('❌ All retries failed.')

# Example with retry wrapper
retry(incremental_load)

✅ No new data found.
