In [None]:
import numpy as np
import pandas as pd 

# Set the size of our data
NUM_READINGS = 1_000_000

# Generate the Temperature array (1 billion random floating-point numbers)
# We use a seed so the results are the same every time you run the code
np.random.seed(42)

# Define mean and standard deviation for each sensor type

# Mean and standard deviation for temperature (Â°C)
mean_temp = 45.0
std_dev_temp = 12.0

# Mean and standard deviation for humidity (% RH)
mean_humidity = 60.0
std_dev_humidity = 15.0

# Mean and standard deviation for vibration (mm/s)
mean_vibration = 5.0
std_dev_vibration = 2.0

# Simulation of when each reading was taken 
timestamps = pd.date_range(start="2025-01-01", periods=NUM_READINGS, freq="S")

# Create 1-D arrays based on variables
temperature_data = np.random.normal(loc=mean_temp, scale=std_dev_temp, size=NUM_READINGS)
humidity_data = np.random.normal(loc=mean_humidity, scale= std_dev_humidity, size=NUM_READINGS)
vibration_data = np.random.normal(loc=mean_vibration, scale=std_dev_vibration, size=NUM_READINGS) 

# Insert variable anomalies for real-world complexity 
num_anomalies = 100 
anomaly_indices = np.random.choice(NUM_READINGS, num_anomalies, replace=False)
temperature_data[anomaly_indices] += np.random.normal(50, 10, num_anomalies)

print(f"Data array size: {temperature_data.size} elements")
print(f"First 5 temperatures: {temperature_data[:5]}")

Data array size: 1000000 elements
First 5 temperatures: [50.96056984 43.34082839 52.77226246 63.27635828 42.1901595 ]


  timestamps = pd.date_range(start="2025-01-01", periods=NUM_READINGS, freq="T")


In [25]:
np.column_stack((temperature_data, humidity_data, vibration_data))

array([[50.96056984, 47.53757782, 20.97083613],
       [43.34082839, 43.17742266, -2.45069764],
       [52.77226246, 62.3493791 ,  6.93890294],
       ...,
       [43.64424304, 48.5551387 ,  5.17552444],
       [62.62970845, 43.11265749, 12.50714189],
       [50.71716306, 71.72622818,  4.47245513]], shape=(1000000, 3))

In [None]:
# All sensor data in a dictionary
sensors = {
    "temperature": temperature_data,
    "humidity": humidity_data,
    "vibration": vibration_data
}

num_anomalies = 100

for name, data in sensors.items():
    anomaly_indices = np.random.choice(NUM_READINGS, num_anomalies, replace=False)
    
    # Define anomaly magnitude differently per sensor if you want
    if name == "temperature":
        data[anomaly_indices] += np.random.normal(50, 10, num_anomalies)
    elif name == "humidity":
        data[anomaly_indices] += np.random.normal(30, 5, num_anomalies)
    elif name == "vibration":
        data[anomaly_indices] += np.random.normal(10, 3, num_anomalies)
    
    sensors[name] = data  # update the dictionary

# Quick check
print(sensors["temperature"][:5])
print(sensors["humidity"][:5])
print(sensors["vibration"][:5])
