In [1]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

np.random.seed(0)

In [2]:
def generate_normal_data(n_samples=1000):
    mq2_base = np.random.exponential(scale=0.3, size=n_samples)
    mq2_spikes = np.random.choice([0, 1], size=n_samples, p=[0.95, 0.05])
    mq2 = mq2_base + mq2_spikes * np.random.uniform(2, 5, n_samples)
    mq2 = np.clip(mq2, 0, 5)
    
    temp = np.random.normal(loc=22, scale=2, size=n_samples)
    temp = np.clip(temp, 15, 30)
    
    humidity = np.random.normal(loc=45, scale=8, size=n_samples)
    humidity = np.clip(humidity, 30, 70)
    
    return mq2, temp, humidity

In [3]:
def generate_gas_leak_data(n_samples=300):
    mq2 = np.random.uniform(5, 50, n_samples)
    clusters = np.random.choice([10, 25, 45], size=n_samples, p=[0.3, 0.4, 0.3])
    mq2 = 0.7 * mq2 + 0.3 * clusters + np.random.normal(0, 2, n_samples)
    mq2 = np.clip(mq2, 5, 55)
    
    temp = np.random.normal(loc=23, scale=2, size=n_samples)
    temp = np.clip(temp, 18, 28)
    
    humidity = np.random.normal(loc=40, scale=7, size=n_samples)
    humidity = np.clip(humidity, 25, 65)
    
    return mq2, temp, humidity

In [4]:
def generate_fire_data(n_samples=200):
    mq2 = np.random.uniform(5, 60, n_samples)
    
    temp = np.random.normal(loc=35, scale=5, size=n_samples)
    temp = np.clip(temp, 28, 50)
    
    humidity = np.random.normal(loc=25, scale=5, size=n_samples)
    humidity = np.clip(humidity, 15, 40)
    
    return mq2, temp, humidity

In [5]:
def generate_high_humidity_data(n_samples=150):
    mq2 = np.random.exponential(scale=0.5, size=n_samples)
    mq2 = np.clip(mq2, 0, 3)
    
    temp = np.random.normal(loc=26, scale=3, size=n_samples)
    temp = np.clip(temp, 20, 35)
  
    humidity = np.random.normal(loc=75, scale=8, size=n_samples)
    humidity = np.clip(humidity, 65, 95)
    
    return mq2, temp, humidity

In [6]:
def create_dataset():
    data = []
    
    mq2, temp, hum = generate_normal_data(1000)
    for i in range(1000):
        data.append({
            'mq2': mq2[i],
            'temperature': temp[i],
            'humidity': hum[i],
            'label': 'normal',
            'label_id': 0
        })
    
    mq2, temp, hum = generate_gas_leak_data(300)
    for i in range(300):
        data.append({
            'mq2': mq2[i],
            'temperature': temp[i],
            'humidity': hum[i],
            'label': 'gas_leak',
            'label_id': 1
        })

    mq2, temp, hum = generate_fire_data(200)
    for i in range(200):
        data.append({
            'mq2': mq2[i],
            'temperature': temp[i],
            'humidity': hum[i],
            'label': 'fire',
            'label_id': 2
        })
    
    mq2, temp, hum = generate_high_humidity_data(150)
    for i in range(150):
        data.append({
            'mq2': mq2[i],
            'temperature': temp[i],
            'humidity': hum[i],
            'label': 'high_humidity',
            'label_id': 3
        })
    
    df = pd.DataFrame(data)
    df = df.sample(frac=1).reset_index(drop=True).round(2)
    
    return df

In [7]:
df = create_dataset()
df.to_csv('sensor_dataset.csv', index=False)

In [8]:
df.head()

Unnamed: 0,mq2,temperature,humidity,label,label_id
0,3.44,21.01,41.29,normal,0
1,35.27,36.8,22.85,fire,2
2,0.47,19.34,64.97,normal,0
3,0.46,21.89,54.4,normal,0
4,0.3,21.65,43.57,normal,0


In [9]:
print(df['label'].value_counts())

label
normal           1000
gas_leak          300
fire              200
high_humidity     150
Name: count, dtype: int64
