In [54]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import uuid
import random
import os

# Configuration
NUM_ROBOTS = 20          # Number of robots
TIME_INTERVAL = 5         # Time interval in seconds
WAREHOUSE_ID = "WH-001"   # Single warehouse
TASKS = ["PICK", "IN_TRANSIT", "RELEASE"]

# File to store the last run timestamp
LAST_RUN_FILE = "last_run.txt"

# Load last timestamp or start fresh
if os.path.exists(LAST_RUN_FILE):
    with open(LAST_RUN_FILE, "r") as f:
        last_timestamp = datetime.strptime(f.read().strip(), '%Y-%m-%d %H:%M:%S')
else:
    last_timestamp = datetime.now() - timedelta(hours=4)

# Initialize robots
robots = {}
for i in range(NUM_ROBOTS):
    robots[f"robot_{i+1}"] = {
        "robot_id": f"RBT-{i+1}",
        "status": "active",
        "battery_level": random.randint(60, 100),
        "temperature": random.uniform(30.0, 35.0),
        "task_id": None,
        "task_type": None,
        "error_code": None
    }

# Generate continuous time series data
data = []
current_time = last_timestamp

# Simulate 4 hours of data with 5 sec intervals
end_time = current_time + timedelta(hours=4)

while current_time <= end_time:
    for robot_id, state in robots.items():
        # Simulate robot behavior
        if state["status"] == "charging":
            state["battery_level"] = min(state["battery_level"] + random.randint(10, 20), 100)
            state["temperature"] = random.uniform(40.0, 50.0)  # Higher during charging
        else:
            state["battery_level"] = max(state["battery_level"] - random.randint(1, 5), 5)
            state["temperature"] = random.uniform(30.0, 35.0)

        if state["battery_level"] < 15:
            state["status"] = "charging"
            state["task_id"] = None
        else:
            state["status"] = np.random.choice(["active", "idle"], p=[0.8, 0.2])
            state["task_id"] = f"T{random.randint(1000, 9999)}" if state["status"] == "active" else None
            state["task_type"] = np.random.choice(TASKS) if state["status"] == "active" else None
            state["error_code"] = random.choice([None, "E101", "E102", "E103"]) if state["status"] == "idle" else None

        data.append({
            "event_id": str(uuid.uuid4()),
            "timestamp": current_time.strftime('%Y-%m-%d %H:%M:%S'),
            "robot_id": state["robot_id"],
            "warehouse_id": WAREHOUSE_ID,
            "status": state["status"],
            "battery_level": state["battery_level"],
            "temperature": round(state["temperature"], 2),
            "task_id": state["task_id"],
            "task_type": state["task_type"],
            "error_code": state["error_code"]
        })

    current_time += timedelta(seconds=TIME_INTERVAL)

# Save the current timestamp for the next run
with open(LAST_RUN_FILE, "w") as f:
    f.write(current_time.strftime('%Y-%m-%d %H:%M:%S'))

# Save data to CSV
df = pd.DataFrame(data)

# print(df.dtypes)

end_timestamp = end_time.strftime('%Y%m%d_%H%M%S')
output_file = f"robotic_data_{end_timestamp}.parquet"

# Save the data as a Parquet file with a timestamped name
df.to_parquet(output_file, engine='pyarrow', compression='snappy')

print(f"✅ Robotic data saved in Parquet format: {output_file}")

✅ Robotic data saved in Parquet format: robotic_data_20250415_082249.parquet


In [55]:
import boto3
# Initialize an S3 client
s3_client = boto3.client('s3')


# S3 bucket name
bucket_name = 'robotic-data'  
# Upload file to S3
def upload_to_s3(file_path, file_name):
    try:
        # Upload the file to S3
        s3_client.upload_file(file_path, bucket_name, file_name)
        print(f"✅ Successfully uploaded {file_name} to S3.")
    except Exception as e:
        print(f"❌ Failed to upload {file_name} to S3. Error: {str(e)}")

# Upload the generated Parquet file to S3
upload_to_s3(output_file, output_file)



✅ Successfully uploaded robotic_data_20250415_082249.parquet to S3.
