In [None]:
# Predictive Analytics of Air Quality for IoT- Enabled Industrial Environments
# Air Particle data annotation code

In [None]:
import pandas as pd

# Define an extended function to categorize based on all variables
def extended_categorize_aqi(row):
    # Check if all required columns are present
    required_columns = ['mc_1p0', 'mc_2p5', 'mc_10p0', 'mc_4p0', 'nox_index', 'voc_index', 'voc', 'co2']
    if not all(col in row.index for col in required_columns):
        raise ValueError("Missing required columns in the DataFrame")

    # Extract values from the row
    pm1 = row['mc_1p0']
    pm25 = row['mc_2p5']
    pm10 = row['mc_10p0']
    pm4 = row['mc_4p0']
    nox = row['nox_index']
    voc_index = row['voc_index']
    voc = row['voc']   # TVOC
    co2 = row['co2']   # CO2

    # Initialize category based on the worst pollutant
    categories = []

    # PM1 thresholds
    if pm1 <= 12:
        categories.append('G')
    elif pm1 <= 35.4:
        categories.append('M')
    elif pm1 <= 55.4:
        categories.append('US')
    elif pm1 <= 150.4:
        categories.append('UH')
    elif pm1 <= 250.4:
        categories.append('VU')
    else:
        categories.append('HZ')

    # PM2.5 thresholds
    if pm25 <= 12:
        categories.append('G')
    elif pm25 <= 35.4:
        categories.append('M')
    elif pm25 <= 55.4:
        categories.append('US')
    elif pm25 <= 150.4:
        categories.append('UH')
    elif pm25 <= 250.4:
        categories.append('VU')
    else:
        categories.append('HZ')

    # PM10 thresholds
    if pm10 <= 54:
        categories.append('G')
    elif pm10 <= 154:
        categories.append('M')
    elif pm10 <= 254:
        categories.append('US')
    elif pm10 <= 354:
        categories.append('UH')
    elif pm10 <= 424:
        categories.append('VU')
    else:
        categories.append('HZ')

    # PM4 thresholds
    if pm4 <= 30:
        categories.append('G')
    elif pm4 <= 75:
        categories.append('M')
    elif pm4 <= 150:
        categories.append('US')
    elif pm4 <= 200:
        categories.append('UH')
    elif pm4 <= 300:
        categories.append('VU')
    else:
        categories.append('HZ')

    # NOx Index thresholds
    if nox <= 50:
        categories.append('G')
    elif nox <= 100:
        categories.append('M')
    elif nox <= 150:
        categories.append('US')
    elif nox <= 200:
        categories.append('UH')
    elif nox <= 300:
        categories.append('VU')
    else:
        categories.append('HZ')

    # VOC Index thresholds
    if voc_index <= 50:
        categories.append('G')
    elif voc_index <= 100:
        categories.append('M')
    elif voc_index <= 150:
        categories.append('US')
    elif voc_index <= 200:
        categories.append('UH')
    elif voc_index <= 300:
        categories.append('VU')
    else:
        categories.append('HZ')

    # TVOC (Total VOC) thresholds
    if voc <= 220:
        categories.append('G')
    elif voc <= 660:
        categories.append('M')
    elif voc <= 2200:
        categories.append('US')
    elif voc <= 5500:
        categories.append('UH')
    elif voc <= 13000:
        categories.append('VU')
    else:
        categories.append('HZ')

    # CO2 thresholds (example values, can be modified as per your requirement)
    if co2 <= 400:
        categories.append('G')
    elif co2 <= 1000:
        categories.append('M')
    elif co2 <= 2000:
        categories.append('US')
    elif co2 <= 5000:
        categories.append('UH')
    elif co2 <= 10000:
        categories.append('VU')
    else:
        categories.append('HZ')

    # Return the worst category from all pollutants
    aqi_category = max(categories, key=lambda x: ['G', 'M', 'US', 'UH', 'VU', 'HZ'].index(x))
    return aqi_category

# Sample DataFrame creation (replace with actual data loading)
# df = pd.read_csv('your_data_file.csv')

# Apply the function to label the AQI category based on all variables
df['AQI_Category'] = df.apply(extended_categorize_aqi, axis=1)