In [9]:
import pandas as pd
from datetime import datetime

# File paths
csv_path = "custom_data.csv"
timestamp_path = "last_extraction.txt"

### --- Section 1: Full Extraction ---
df = pd.read_csv(csv_path, parse_dates=['transaction_date'])
print("=== Full Extraction ===")
print(df.head())
print(f"Extracted {len(df)} rows fully.\n")

### --- Section 2: Incremental Extraction ---
print("=== Incremental Extraction ===")

# Read last extraction time
try:
    with open(timestamp_path, "r") as f:
        content = f.read().strip()
        if content:
            last_extraction_time = datetime.strptime(content, "%Y-%m-%d %H:%M:%S")
        else:
            last_extraction_time = datetime(1970, 1, 1)
except FileNotFoundError:
    last_extraction_time = datetime(1970, 1, 1)

# Now use it to filter
new_data = df[df['transaction_date'] > last_extraction_time]
print(f"Extracted {len(new_data)} rows incrementally since last check.")

### --- Section 3: Save New Timestamp ---
if not new_data.empty:
    new_timestamp = new_data['transaction_date'].max().strftime("%Y-%m-%d %H:%M:%S")
    with open(timestamp_path, "w") as f:
        f.write(new_timestamp)
    print(f"Updated last extraction timestamp to: {new_timestamp}")
else:
    print("No new data to update timestamp.")


=== Full Extraction ===
   transaction_id  customer_id  product_id product_name  quantity  \
0               1         1097         103       Tablet         4   
1               2         1097         101       Laptop         5   
2               3         1006         102   Smartphone         5   
3               4         1074         107      Printer         2   
4               5         1084         106        Mouse         4   

   price_per_unit  total_price    transaction_date  
0          275.40      1101.60 2025-06-06 16:04:38  
1          149.56       747.80 2025-06-07 16:04:38  
2          339.26      1696.30 2025-06-08 16:04:38  
3          275.67       551.34 2025-06-05 16:04:38  
4          143.74       574.96 2025-06-05 16:04:38  
Extracted 100 rows fully.

=== Incremental Extraction ===
Extracted 0 rows incrementally since last check.
No new data to update timestamp.
