In [1]:
from faker import Faker
import pandas as pd
import random

fake = Faker()
data = []

for _ in range(60):
    data.append({
        "user_id": fake.uuid4(),
        "event": random.choice(["login", "purchase", "logout"]),
        "timestamp": fake.date_time_this_month().isoformat()
    })

df = pd.DataFrame(data)
df.to_csv("custom_data.csv", index=False)


In [5]:
import pandas as pd

df = pd.read_csv("custom_data.csv")
print("Full Extraction")
print(f"Extracted {len(df)} rows fully.")
df.head()


Full Extraction
Extracted 60 rows fully.


Unnamed: 0,user_id,event,timestamp
0,7c06618f-8a2f-4ad8-83e6-75594e07513b,logout,2025-06-05T04:40:56
1,75a29b73-119d-4b7c-9812-1c7443cdeda6,logout,2025-06-01T07:09:52
2,ab3abfff-199e-4d94-8e89-3e708f9c1570,purchase,2025-06-05T17:44:53
3,aec10591-d829-4ec7-9b42-840cc91fd30b,logout,2025-06-01T01:59:15
4,d4ee798c-270f-4812-b5dd-96975024f202,logout,2025-06-03T04:56:33


In [6]:
from datetime import datetime

# Load last timestamp
try:
    with open("last_extraction.txt", "r") as f:
        last_time = datetime.fromisoformat(f.read().strip())
except FileNotFoundError:
    last_time = datetime.min  # Extract all if no previous time

df = pd.read_csv("custom_data.csv")
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Filter for new records
new_data = df[df['timestamp'] > last_time]

print("Incremental Extraction")
print(f"Extracted {len(new_data)} rows incrementally since last check.")
new_data.head()


Incremental Extraction
Extracted 60 rows incrementally since last check.


Unnamed: 0,user_id,event,timestamp
0,7c06618f-8a2f-4ad8-83e6-75594e07513b,logout,2025-06-05 04:40:56
1,75a29b73-119d-4b7c-9812-1c7443cdeda6,logout,2025-06-01 07:09:52
2,ab3abfff-199e-4d94-8e89-3e708f9c1570,purchase,2025-06-05 17:44:53
3,aec10591-d829-4ec7-9b42-840cc91fd30b,logout,2025-06-01 01:59:15
4,d4ee798c-270f-4812-b5dd-96975024f202,logout,2025-06-03 04:56:33


In [7]:
# Update timestamp
if not new_data.empty:
    new_timestamp = new_data['timestamp'].max().isoformat()
    with open("last_extraction.txt", "w") as f:
        f.write(new_timestamp)
