# Normal dataset

In [5]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Set random seed for reproducibility
np.random.seed(42)

# Parameters
num_machines = 5
records_per_machine = 300
start_time = datetime(2025, 1, 1, 0, 0, 0)
time_interval = timedelta(minutes=10)

# Data generation
data = []

for machine_id in range(1, num_machines + 1):
    timestamp = start_time
    for _ in range(records_per_machine):
        # Simulate 6 engine gas temperature readings
        temps = np.random.normal(loc=300, scale=5, size=6)
        pressure = np.random.normal(loc=100, scale=2)
        data.append([
            machine_id,
            timestamp.strftime('%Y-%m-%d %H:%M:%S'),
            *temps,
            pressure
        ])
        timestamp += time_interval

# Create DataFrame
columns = [
    "machineid",
    "datetimestamp",
    "enginegastemp1",
    "enginegastemp2",
    "enginegastemp3",
    "enginegastemp4",
    "enginegastemp5",
    "enginegastemp6",
    "pressure"
]

df = pd.DataFrame(data, columns=columns)

# Show the first few rows
print(df.head())


   machineid        datetimestamp  enginegastemp1  enginegastemp2  \
0          1  2025-01-01 00:00:00      302.483571      299.308678   
1          1  2025-01-01 00:10:00      303.837174      297.652628   
2          1  2025-01-01 00:20:00      291.375411      297.188562   
3          1  2025-01-01 00:30:00      298.871118      300.337641   
4          1  2025-01-01 00:40:00      296.996807      298.541531   

   enginegastemp3  enginegastemp4  enginegastemp5  enginegastemp6    pressure  
0      303.238443      307.615149      298.829233      298.829315  103.158426  
1      302.712800      297.682912      297.671351      301.209811   96.173440  
2      294.935844      301.571237      295.459880      292.938481  102.931298  
3      292.876259      297.278086      300.554613      294.245032  100.751396  
4      296.991467      309.261391      299.932514      294.711445  101.645090  


In [6]:
df.head(100)

Unnamed: 0,machineid,datetimestamp,enginegastemp1,enginegastemp2,enginegastemp3,enginegastemp4,enginegastemp5,enginegastemp6,pressure
0,1,2025-01-01 00:00:00,302.483571,299.308678,303.238443,307.615149,298.829233,298.829315,103.158426
1,1,2025-01-01 00:10:00,303.837174,297.652628,302.712800,297.682912,297.671351,301.209811,96.173440
2,1,2025-01-01 00:20:00,291.375411,297.188562,294.935844,301.571237,295.459880,292.938481,102.931298
3,1,2025-01-01 00:30:00,298.871118,300.337641,292.876259,297.278086,300.554613,294.245032,100.751396
4,1,2025-01-01 00:40:00,296.996807,298.541531,296.991467,309.261391,299.932514,294.711445,101.645090
...,...,...,...,...,...,...,...,...,...
95,1,2025-01-01 15:50:00,300.532151,298.725114,307.519965,286.745151,305.457534,306.230426,95.853220
96,1,2025-01-01 16:00:00,298.286562,298.142796,292.962442,296.110917,294.447121,308.761352,101.871357
97,1,2025-01-01 16:10:00,306.357775,303.608360,294.354741,297.377399,302.446873,293.889361,101.425997
98,1,2025-01-01 16:20:00,298.798373,298.125896,303.554800,302.221317,298.195169,305.796649,97.837873


In [7]:
df.to_csv('pred_maint_timeseries.csv', index=False)

# Dataset with abnormalities

In [8]:
#Set random seed
np.random.seed(42)

# Parameters
num_machines = 5
records_per_machine = 100
start_time = datetime(2025, 1, 1, 0, 0, 0)
time_interval = timedelta(minutes=10)
failure_prob = 0.08  # 8% of records will have abnormal readings

# Data generation
data = []

for machine_id in range(1, num_machines + 1):
    timestamp = start_time
    for _ in range(records_per_machine):
        # Base temperatures and pressure
        temps = np.random.normal(loc=300, scale=5, size=6)
        pressure = np.random.normal(loc=100, scale=2)

        # Simulate failure conditions
        is_failure_warning = False
        if np.random.rand() < failure_prob:
            is_failure_warning = True
            # Inject abnormal values (e.g., overheating)
            fault_sensor = np.random.choice(range(6))  # Random sensor to spike
            temps[fault_sensor] += np.random.uniform(50, 100)  # Spike temp

        data.append([
            machine_id,
            timestamp.strftime('%Y-%m-%d %H:%M:%S'),
            *temps,
            pressure,
            int(is_failure_warning)
        ])
        timestamp += time_interval

# Create DataFrame
columns = [
    "machineid",
    "datetimestamp",
    "enginegastemperature1",
    "enginegastemperature2",
    "enginegastemperature3",
    "enginegastemperature4",
    "enginegastemperature5",
    "enginegastemperature6",
    "pressure",
    "failure_warning"
]




   machineid        datetimestamp  enginegastemperature1  \
0          1  2025-01-01 00:00:00             302.483571   
1          1  2025-01-01 00:10:00             303.837174   
2          1  2025-01-01 00:20:00             299.877724   
3          1  2025-01-01 00:30:00             303.323272   
4          1  2025-01-01 00:40:00             297.766293   

   enginegastemperature2  enginegastemperature3  enginegastemperature4  \
0             385.408617             303.238443             307.615149   
1             300.111109             297.861035             297.340913   
2             301.777757             302.085056             304.162309   
3             303.895963             294.494511             305.651141   
4             307.621208             301.614999             293.032915   

0             298.829233             298.829315  103.158426                1  
1             299.412622             301.110395   98.464047                0  
2             298.533004            

In [None]:
df = pd.DataFrame(data, columns=columns)

# Save to CSV
df.to_csv('pred_maint_timeseries_with_failures.csv', index=False)

print(df.head())

# Extend data

In [9]:

# Set random seed
np.random.seed(42)

# Parameters
num_machines = 5
records_per_day = 24 * 6  # 10-minute intervals = 6 per hour
days = 365
records_per_machine = records_per_day * days
start_time = datetime(2025, 1, 1, 0, 0, 0)
time_interval = timedelta(minutes=10)
failure_prob = 0.08  # 8% of records will have abnormal readings

# Data generation
data = []

for machine_id in range(1, num_machines + 1):
    timestamp = start_time
    for _ in range(records_per_machine):
        # Normal temperatures and pressure
        temps = np.random.normal(loc=300, scale=5, size=6)
        pressure = np.random.normal(loc=100, scale=2)

        # Inject abnormal temps at random
        if np.random.rand() < failure_prob:
            fault_sensor = np.random.choice(range(6))  # Choose random sensor
            temps[fault_sensor] += np.random.uniform(50, 100)  # Spike

        data.append([
            machine_id,
            timestamp.strftime('%Y-%m-%d %H:%M:%S'),
            *temps,
            pressure
        ])
        timestamp += time_interval

# Create DataFrame
columns = [
    "machineid",
    "datetimestamp",
    "enginegastemperature1",
    "enginegastemperature2",
    "enginegastemperature3",
    "enginegastemperature4",
    "enginegastemperature5",
    "enginegastemperature6",
    "pressure"
]



In [10]:
df = pd.DataFrame(data, columns=columns)

# Export to CSV
df.to_csv("pred_maint_yearly_timeseries.csv", index=False)

print(df.head())


   machineid        datetimestamp  enginegastemperature1  \
0          1  2025-01-01 00:00:00             302.483571   
1          1  2025-01-01 00:10:00             303.837174   
2          1  2025-01-01 00:20:00             299.877724   
3          1  2025-01-01 00:30:00             303.323272   
4          1  2025-01-01 00:40:00             297.766293   

   enginegastemperature2  enginegastemperature3  enginegastemperature4  \
0             385.408617             303.238443             307.615149   
1             300.111109             297.861035             297.340913   
2             301.777757             302.085056             304.162309   
3             303.895963             294.494511             305.651141   
4             307.621208             301.614999             293.032915   

   enginegastemperature5  enginegastemperature6    pressure  
0             298.829233             298.829315  103.158426  
1             299.412622             301.110395   98.464047  
2       