### 1. MLP model applied (Aggregated Dataset).

In [1]:
# 1. Importing allowed libraries
import pandas as pd
import numpy as np
import joblib
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler, LabelEncoder

# 2. Load saved model
mlp_model = load_model("../models/mlp_model_aggregated_dataset.keras")

# 3. Load full aggregated dataset to get columns
aggregated_df = pd.read_csv("../data/processed/aggregated_dataset.csv")

# 4. Randomly select a sample from the aggregated dataset
sample_data = aggregated_df.sample(n=1, random_state=42)
print("🔍 Amostra escolhida aleatoriamente:")
display(sample_data)

# 5. Separate features (X) and class (y)
X_sample = sample_data.drop(columns=["classe", "tag"]).copy()
y_sample = sample_data["classe"].copy()

# 6. Data processing same as original training

# Standardization (StandardScaler) - pre-fit in training
scaler = StandardScaler()
X_train = aggregated_df.drop(columns=["classe", "tag"])
scaler.fit(X_train)
X_sample_scaled = scaler.transform(X_sample)

# Label Encoder to interpret results
label_encoder = LabelEncoder()
label_encoder.fit(aggregated_df["classe"])

# 7. Application of MLP model for prediction
prediction_probabilities = mlp_model.predict(X_sample_scaled)
predicted_class = label_encoder.inverse_transform(np.argmax(prediction_probabilities, axis=1))

# 8. Prediction display
print("\n✅ Resultado da Predição:")
print(f"Classe Real: {y_sample.values[0]}")
print(f"Classe Predita pelo modelo: {predicted_class[0]}")


🔍 Amostra escolhida aleatoriamente:


Unnamed: 0,0_mean,0_std,0_max,0_min,Direção do vento,Velocidade do vento (m/s),Intervalos de Emissão,Altura,classe,tag
24,200.274445,115.526028,400.0,0.564501,315,2.5,0.2,10,E1,17-A1



✅ Resultado da Predição:
Classe Real: E1
Classe Predita pelo modelo: E2


### 2. MLP model applied (Complete Dataset)

In [2]:
# 1. Importing the necessary libraries
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler, LabelEncoder

# 2. Load saved model
mlp_model_complete = load_model("../models/mlp_model_complete_dataset.keras")

# 3. Load full dataset to get original columns
complete_df = pd.read_csv("../data/processed/complete_dataset.csv")

#4. Choose a random sample from a single scenario (Subfolder)
random_subfolder = complete_df['Subfolder'].sample(n=1, random_state=42).iloc[0]
sample_data = complete_df[complete_df['Subfolder'] == random_subfolder]
print(f"🔍 Cenário escolhido aleatoriamente: {random_subfolder}")
display(sample_data.head())

# 5. Preparação das features (X) e seleção da classe (y)
X_sample = sample_data.drop(columns=["Emission_Point", "Subfolder"]).copy()
y_sample = sample_data["Emission_Point"].iloc[0]  # Todos iguais no mesmo cenário

# 6. Data processing (same as original training)
# Normalization using StandardScaler
scaler = StandardScaler()
X_train = complete_df.drop(columns=["Emission_Point", "Subfolder"])
scaler.fit(X_train)
X_sample_scaled = scaler.transform(X_sample)

# Adjusting the format for the MLP model (single sample - time average)
X_sample_mean = X_sample_scaled.mean(axis=0).reshape(1, -1)

# LabelEncoder to interpret results
label_encoder = LabelEncoder()
label_encoder.fit(complete_df["Emission_Point"])

# 7. Application of MLP model for prediction
prediction_probabilities = mlp_model_complete.predict(X_sample_mean)
predicted_class = label_encoder.inverse_transform(np.argmax(prediction_probabilities, axis=1))

# 8. Prediction display
print("\n✅ Resultado da Predição:")
print(f"Classe Real: {y_sample}")
print(f"Classe Predita pelo modelo: {predicted_class[0]}")


🔍 Cenário escolhido aleatoriamente: 3-A3


Unnamed: 0,Time,PartMatter01,PartMatter02,PartMatter03,PartMatter04,PartMatter05,PartMatter06,PartMatter07,PartMatter08,PartMatter09,...,PM1-Density-L53z2,PM1-Density-L54z2,PM1-Density-L55z2,mass,Emission_Point,Subfolder,Wind_Direction,Wind_Speed,Emission_Interval,Height
56109,,,,,,,,,,,...,,,,,E1,3-A3,225,6.0,0.1,20
56110,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,E1,3-A3,225,6.0,0.1,20
56111,0.413913,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.075,E1,3-A3,225,6.0,0.1,20
56112,0.827825,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.195,E1,3-A3,225,6.0,0.1,20
56113,1.241738,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.315,E1,3-A3,225,6.0,0.1,20



✅ Resultado da Predição:
Classe Real: E1
Classe Predita pelo modelo: E1


### 3. CNN1D model applied (Complete Dataset)

In [4]:
# 1. Importing the necessary libraries
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler, LabelEncoder

# 2. Load the saved CNN1D model
cnn1d_model = load_model("../models/cnn1d_model_complete_dataset.h5")

# 3. Load original full dataset
complete_df = pd.read_csv("../data/processed/complete_dataset.csv")

#4. Randomly choose a simulation (full scenario)
random_subfolder = complete_df['Subfolder'].sample(n=1, random_state=42).iloc[0]
sample_data = complete_df[complete_df['Subfolder'] == random_subfolder]
print(f"🔍 Cenário escolhido aleatoriamente: {random_subfolder}")
display(sample_data.head())

# 5. Separate features (X) and class (y)
X_sample = sample_data.drop(columns=["Emission_Point", "Subfolder"]).copy()
y_sample = sample_data["Emission_Point"].iloc[0]

# 6. Normalization (StandardScaler applied per scenario)
scaler = StandardScaler()
X_sample_scaled = scaler.fit_transform(X_sample)

# 7. Correct data fit for CNN1D (337-step window with 1 feature at a time)
timesteps = 337  
X_sample_flat = X_sample_scaled.flatten()

# Check if the data length is sufficient
total_length = len(X_sample_flat)
if total_length < timesteps:
    raise ValueError(f"Dados insuficientes: encontrado {total_length}, necessário pelo menos {timesteps}")

# Get only the first 337 values for a single window
X_cnn_input = X_sample_flat[:timesteps].reshape((1, timesteps, 1))

# 8. LabelEncoder to interpret results
label_encoder = LabelEncoder()
label_encoder.fit(complete_df["Emission_Point"])

# 9. Application of CNN1D model for prediction
prediction_probabilities = cnn1d_model.predict(X_cnn_input)
predicted_class = label_encoder.inverse_transform(np.argmax(prediction_probabilities, axis=1))

# 10. Prediction display
print("\n✅ Resultado da Predição CNN1D:")
print(f"Classe Real: {y_sample}")
print(f"Classe Predita pelo modelo: {predicted_class[0]}")

🔍 Cenário escolhido aleatoriamente: 3-A3


Unnamed: 0,Time,PartMatter01,PartMatter02,PartMatter03,PartMatter04,PartMatter05,PartMatter06,PartMatter07,PartMatter08,PartMatter09,...,PM1-Density-L53z2,PM1-Density-L54z2,PM1-Density-L55z2,mass,Emission_Point,Subfolder,Wind_Direction,Wind_Speed,Emission_Interval,Height
56109,,,,,,,,,,,...,,,,,E1,3-A3,225,6.0,0.1,20
56110,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,E1,3-A3,225,6.0,0.1,20
56111,0.413913,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.075,E1,3-A3,225,6.0,0.1,20
56112,0.827825,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.195,E1,3-A3,225,6.0,0.1,20
56113,1.241738,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.315,E1,3-A3,225,6.0,0.1,20



✅ Resultado da Predição CNN1D:
Classe Real: E1
Classe Predita pelo modelo: E1


### 4. CNN1D model applied (Timeseries Dataset)

In [5]:
# 1. Importing the necessary libraries
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder

# 2. Load the saved CNN1D model
cnn1d_timeseries_model = load_model("../models/cnn1d_model_timeseries_dataset.keras")

# 3. Load timeseries dataset to check original format
timeseries_df = pd.read_csv("../data/processed/timeseries_dataset.csv")

# 4. Randomly choose a sample (time window)
sample_row = timeseries_df.sample(n=1, random_state=42)
print("🔍 Janela temporal escolhida aleatoriamente:")
display(sample_row)

# 5. Separate features (X) and class (y)
X_sample_flat = sample_row.drop(columns=["classe"]).values
y_sample = sample_row["classe"].iloc[0]

# 6. Adjusting the format for CNN1D (30 timesteps, correct number of features)
num_features = int(X_sample_flat.shape[1] / 30)  
X_sample_reshaped = X_sample_flat.reshape((1, 30, num_features))

# 7. LabelEncoder to interpret results
label_encoder = LabelEncoder()
label_encoder.fit(timeseries_df["classe"])

# 8. Application of CNN1D model for prediction
prediction_probabilities = cnn1d_timeseries_model.predict(X_sample_reshaped)
predicted_class = label_encoder.inverse_transform(np.argmax(prediction_probabilities, axis=1))

# 9. Prediction display
print("\n✅ Resultado da Predição CNN1D (Timeseries):")
print(f"Classe Real: {y_sample}")
print(f"Classe Predita pelo modelo: {predicted_class[0]}")

🔍 Janela temporal escolhida aleatoriamente:


Unnamed: 0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,...,f9891,f9892,f9893,f9894,f9895,f9896,f9897,f9898,f9899,classe
19957,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,-0.106796,0.0,0.0,0.0,E3



✅ Resultado da Predição CNN1D (Timeseries):
Classe Real: E3
Classe Predita pelo modelo: E3


### 5. LSTM model applied 

In [6]:
# 1. Importing the necessary libraries
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder

# 2. Load the saved LSTM model
lstm_model = load_model("../models/lstm_model_timeseries_dataset.keras")

# 3. Load timeseries dataset
timeseries_df = pd.read_csv("../data/processed/timeseries_dataset.csv")

#4. Randomly choose a time window
sample_row = timeseries_df.sample(n=1, random_state=42)
print("🔍 Janela temporal escolhida aleatoriamente:")
display(sample_row)

# 5. Separate features (X) and class (y)
X_sample_flat = sample_row.drop(columns=["classe"]).values
y_sample = sample_row["classe"].iloc[0]

# 6. Adjust format for LSTM: (samples, timesteps, features)
timesteps = 30
num_features = int(X_sample_flat.shape[1] / timesteps)
X_sample_reshaped = X_sample_flat.reshape((1, timesteps, num_features))

# 7. LabelEncoder to interpret results
label_encoder = LabelEncoder()
label_encoder.fit(timeseries_df["classe"])

# 8. Applying LSTM model for prediction
prediction_probabilities = lstm_model.predict(X_sample_reshaped)
predicted_class = label_encoder.inverse_transform(np.argmax(prediction_probabilities, axis=1))

# 9. Prediction display
print("\n✅ Resultado da Predição LSTM (Timeseries):")
print(f"Classe Real: {y_sample}")
print(f"Classe Predita pelo modelo: {predicted_class[0]}")

🔍 Janela temporal escolhida aleatoriamente:


Unnamed: 0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,...,f9891,f9892,f9893,f9894,f9895,f9896,f9897,f9898,f9899,classe
19957,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,-0.106796,0.0,0.0,0.0,E3



✅ Resultado da Predição LSTM (Timeseries):
Classe Real: E3
Classe Predita pelo modelo: E3
