Import Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
import m2cgen as m2c
import os
import joblib


Functions

In [2]:
def load_data(file_path):
    """Load the dataset from a CSV file."""
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    data = pd.read_csv(file_path, delimiter=';')
    return data

def check_columns(data, required_columns):
    """Check if required columns are in the dataframe."""
    missing_columns = [col for col in required_columns if col not in data.columns]
    if missing_columns:
        raise KeyError(f"Missing columns in dataset: {missing_columns}")

def generate_log_columns(data):
    """Generate log10(si) columns."""
    for i in range(10):
        col_name = f's{i}'
        if col_name in data.columns:
            data[f'log_{col_name}'] = np.log10(data[col_name] + 1)
        else:
            raise KeyError(f"Column '{col_name}' not found in the dataset")
    return data

def generate_difference_columns(data):
    """Generate D0, D1, ..., D8 columns."""
    for i in range(9):
        data[f'D{i}'] = data[f'log_s{i+1}'] - data[f'log_s{i}']
    return data

def encode_labels(data, column):
    """Encode categorical labels into numerical values."""
    label_encoder = LabelEncoder()
    data[f'{column}_encoded'] = label_encoder.fit_transform(data[column])
    return data


Load and Preprocess Data

In [3]:
# File path
file_path = '/home/max/infer/sensordata/merged_dataset_sensors_yellowfire.csv'  # Modify with the correct file path

# Load dataset
try:
    data = load_data(file_path)
except Exception as e:
    print(f"Error loading data: {e}")

# Check for required columns
required_columns = [f's{i}' for i in range(10)] + ['value']
try:
    check_columns(data, required_columns)
except KeyError as e:
    print(e)

# Generate log columns
try:
    data = generate_log_columns(data)
except KeyError as e:
    print(e)

# Generate difference columns
data = generate_difference_columns(data)

# Encode labels
try:
    data = encode_labels(data, 'value')
except KeyError as e:
    print(e)

print("Nomi delle colonne del file CSV:", data.columns)
print("Valori unici nella colonna 'value_encoded':", data['value_encoded'].unique())


Nomi delle colonne del file CSV: Index(['_time', 'vcc', 'vpanel', 'tmp', 'hum', 's0', 's1', 's2', 's3', 's4',
       's5', 's6', 's7', 's8', 's9', 'value', 'sensor_name', 'log_s0',
       'log_s1', 'log_s2', 'log_s3', 'log_s4', 'log_s5', 'log_s6', 'log_s7',
       'log_s8', 'log_s9', 'D0', 'D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7',
       'D8', 'value_encoded'],
      dtype='object')
Valori unici nella colonna 'value_encoded': [0 1]


Feature Selection and Train-Test Split

In [4]:
# Create feature matrix and target vector
features = [f'D{i}' for i in range(9)]
X = data[features]
y = data['value_encoded']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Train and Evaluate Random Forest Model

In [5]:
# Train and evaluate Random Forest model
rf_model = RandomForestClassifier(n_estimators=7, max_depth=12, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
print("Random Forest Classification Report:")
print(classification_report(y_test, y_pred_rf))
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))

# Save Random Forest model
rf_model_path = '/home/max/infer/sensordata/rf_edge_model.joblib'
joblib.dump(rf_model, rf_model_path)
print(f"Random Forest model saved to '{rf_model_path}'")


Random Forest Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.98      0.98       646
           1       0.98      0.98      0.98       565

    accuracy                           0.98      1211
   macro avg       0.98      0.98      0.98      1211
weighted avg       0.98      0.98      0.98      1211

Random Forest Accuracy: 0.9793559042113955
Random Forest model saved to '/home/max/infer/sensordata/rf_edge_model.joblib'


Train and Evaluate XGBoost Model

In [6]:
# Train and evaluate XGBoost model
xgb_model = XGBClassifier(max_depth=12, gamma=1, random_state=42)
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)
print("XGBoost Classification Report:")
print(classification_report(y_test, y_pred_xgb))
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred_xgb))

# Save XGBoost model
xgb_model_path = '/home/max/infer/sensordata/xgb_edge_model.joblib'
joblib.dump(xgb_model, xgb_model_path)
print(f"XGBoost model saved to '{xgb_model_path}'")


XGBoost Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.98      0.98       646
           1       0.97      0.98      0.98       565

    accuracy                           0.98      1211
   macro avg       0.98      0.98      0.98      1211
weighted avg       0.98      0.98      0.98      1211

XGBoost Accuracy: 0.9793559042113955
XGBoost model saved to '/home/max/infer/sensordata/xgb_edge_model.joblib'


Save Processed Dataset

In [7]:
# Save the new dataset to a CSV file
output_file = '/home/max/infer/sensordata/edge_dataset.csv'
data.to_csv(output_file, index=False)
print(f"File salvato come '{output_file}'")


File salvato come '/home/max/infer/sensordata/edge_dataset.csv'


Export Models to Python

In [8]:
# Export XGBoost model to Python code
python_code = m2c.export_to_python(xgb_model)
python_code_path = 'xgb_edge_model.py'
with open(python_code_path, 'w') as file:
    file.write(python_code)
print(f"XGBoost model exported to Python code and saved to '{python_code_path}'")


TypeError: unsupported operand type(s) for /: 'float' and 'NoneType'

Export Models to C

In [9]:
# Convert XGBoost model to C code
c_code = m2c.export_to_c(xgb_model)
c_code_path = 'xgb_edge_model.c'
with open(c_code_path, 'w') as file:
    file.write(c_code)
print(f"XGBoost model exported to C and saved to '{c_code_path}'")


TypeError: unsupported operand type(s) for /: 'float' and 'NoneType'