In [None]:
import pandas as pd
from tabulate import tabulate

# Load the dataset
file_path = "K:\\Code Projects\\ETL_Extract_Justice_Chawanda_670444\\Immigration_Data.csv"
data = pd.read_csv(file_path)

# Display basic stats
print(f"Rows: {data.shape[0]}, Columns: {data.shape[1]}")
print(tabulate(data, headers='keys', tablefmt='grid'))

# Print extraction message
print(f"Extracted {data.shape[0]} rows fully.")

In [None]:
import os
from datetime import datetime
import pandas as pd

# Simulate a last extraction time
last_extraction_file = "last_extraction.txt"
if not os.path.exists(last_extraction_file):
    with open(last_extraction_file, "w") as f:
        f.write("2025-06-01 00:00:00")  # Initial extraction time

# Read the last extraction time
with open(last_extraction_file, "r", encoding="utf-8") as f:
    content = f.read().strip()
    try:
        last_extraction_time = datetime.strptime(content, "%Y-%m-%d %H:%M:%S")
    except ValueError:
        # If the file content is invalid, reset to a default time
        last_extraction_time = datetime(2025, 6, 1, 0, 0, 0)
        with open(last_extraction_file, "w", encoding="utf-8") as fw:
            fw.write(last_extraction_time.strftime("%Y-%m-%d %H:%M:%S"))

# Filter new or updated records using mixed datetime formats
new_data = data[pd.to_datetime(data['timestamp'], format='mixed') > last_extraction_time]

# Print the number of rows extracted incrementally
print(f"Extracted {new_data.shape[0]} rows incrementally since last check.")

# Update the last extraction time
with open(last_extraction_file, "w") as f:
    f.write(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

# Display the last recorded timestamp
if not data.empty:
    last_timestamp = data['timestamp'].iloc[-1]
    description = f"The last record was added on: {last_timestamp}"
    # Write the description and last timestamp to the text file
    with open(last_extraction_file, "w") as f:
        f.write(description)
else:
    description = "The dataset is empty."
    with open(last_extraction_file, "w") as f:
        f.write(description)

In [None]:
# Ask the user if they want to add a new record
add_record = input("Do you want to add a new record? (yes/no): ").strip().lower()
if add_record == 'yes':
    # Collect new record details from the user
    new_record = {
        "immigrant_id": input("Enter immigrant ID: "),
        "passport_number": input("Enter passport number: "),
        "name": input("Enter name: "),
        "country": input("Enter country: "),
        "purpose_of_visit": input("Enter purpose of visit: "),
        "contact": input("Enter contact: "),
        "visa payment_status": input("Enter payment status: "),
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    }

    # Append the new record to the dataset
    data = pd.concat([data, pd.DataFrame([new_record])], ignore_index=True)

    # Save the updated dataset back to the file
    data.to_csv(file_path, index=False)

    # Update the last_extraction.txt file with the description and timestamp of the very last record
    last_timestamp = data['timestamp'].iloc[-1]
    description = f"The last record was added on: {last_timestamp}"
    with open(last_extraction_file, "w") as f:
        f.write(description)

    print("New record added successfully and timestamp updated!")
else:
    print("No new record added.")

# Workflow Explanation

## Overview
This workflow is designed to manage and update a dataset of immigration records. It includes functionality to:
1. Display the timestamp of the last recorded entry.
2. Allow the user to add a new record interactively.
3. Update a text file (`last_extraction.txt`) with the timestamp of the last record.

## Steps

### 1. Load the Dataset
- The dataset is loaded from a CSV file located at `K:\Code Projects\ETL_Extract_Justice_Chawanda_670444\Immigration_Data.csv`.
- If the dataset is empty, a message is written to the `last_extraction.txt` file indicating that the dataset is empty.

### 2. Display the Last Recorded Timestamp
- If the dataset is not empty, the timestamp of the last record is retrieved and written to the `last_extraction.txt` file.
- This ensures that the file always reflects the most recent update.

### 3. Add a New Record
- The user is prompted to decide whether they want to add a new record.
- If the user chooses "yes":
  - The user is asked to input details for the new record, including:
    - Immigrant ID
    - Passport Number
    - Name
    - Country
    - Purpose of Visit
    - Contact
    - Payment Status
  - The current timestamp is automatically added to the record.
  - The new record is appended to the dataset.
  - The updated dataset is saved back to the CSV file.
  - The `last_extraction.txt` file is updated with the timestamp of the newly added record.
- If the user chooses "no":
  - A message "No new record added." is displayed.

## Files Used

### 1. `Immigration_Data.csv`
- Stores the dataset of immigration records.
- Updated whenever a new record is added.

### 2. `last_extraction.txt`
- Stores the timestamp of the last recorded entry.
- Updated every time the workflow is executed, regardless of whether a new record is added.

## Key Features
- Ensures the dataset and `last_extraction.txt` file are always in sync.
- Provides an interactive way to manage records.
- Handles empty datasets gracefully.

## Future Improvements
- Add validation for user inputs to ensure data integrity.
- Implement logging to track changes to the dataset.
- Provide an option to delete or update existing records.