In [1]:
# Re-import needed libraries after code execution state reset
import pandas as pd
import numpy as np

# Set seed for reproducibility
np.random.seed(42)

# Configuration
n_samples = 5000




In [2]:
# Define sensors and normal operating ranges
sensors = {
    "condenser_temp": (30, 40),     # °C
    "evaporator_temp": (5, 10),     # °C
    "compressor_current": (12, 20), # Amps
    "evaporator_pressure": (300, 400), # kPa
    "condenser_pressure": (800, 1000), # kPa
    "refrigerant_flow": (4, 8),     # L/min
}


In [3]:
# Fault types
fault_classes = {
    0: "Normal",
    1: "Condenser Fouling",
    2: "Evaporator Fouling",
    3: "Refrigerant Leak",
    4: "Excess Oil",
    5: "Defective Pilot Valve",
}




In [4]:
# Function to generate normal data
def generate_normal_data(n):
    data = []
    for _ in range(n):
        row = [np.random.uniform(low, high) for low, high in sensors.values()]
        data.append(row)
    return np.array(data)



In [5]:
# Fault simulation logic
def inject_fault(sample, fault_type):
    sample = sample.copy()

    if fault_type == 1:  # Condenser Fouling
        sample[0] += np.random.uniform(5, 10)

    elif fault_type == 2:  # Evaporator Fouling
        sample[1] += np.random.uniform(3, 6)
        sample[5] -= np.random.uniform(1, 2)

    elif fault_type == 3:  # Refrigerant Leak
        sample[3] -= np.random.uniform(50, 100)
        sample[4] -= np.random.uniform(100, 200)

    elif fault_type == 4:  # Excess Oil
        sample[2] += np.random.uniform(5, 10)

    elif fault_type == 5:  # Defective Pilot Valve
        sample[5] -= np.random.uniform(2, 3)
        sample[0] += np.random.uniform(2, 4)
        sample[1] -= np.random.uniform(2, 3)

    return sample



In [6]:
# Build full dataset
data = []
labels = []



In [7]:
# Normal samples
normal_data = generate_normal_data(int(n_samples * 0.4))
data.extend(normal_data)
labels.extend([0] * len(normal_data))

# Faulty samples
for fault_id in range(1, 6):
    base_data = generate_normal_data(int(n_samples * 0.12))
    faulty_data = np.array([inject_fault(row, fault_id) for row in base_data])
    data.extend(faulty_data)
    labels.extend([fault_id] * len(faulty_data))



In [13]:
# Create dataframe
df = pd.DataFrame(data, columns=sensors.keys())
df["fault_type"] = labels
df["fault_name"] = df["fault_type"].map(fault_classes)

# Save to CSV
csv_path = "/content/sample_data/chiller_fault_dataset.csv"
df.to_csv(csv_path, index=False)
print("your csv file saved in sample_data")

#import os

#downloads_path = os.path.join(os.path.expanduser("~"), "Downloads", "chiller_fault_dataset.csv")
#df.to_csv(downloads_path, index=False)


csv_path

your csv file saved in sample_data


'/content/sample_data/chiller_fault_dataset.csv'