## Data fusion

Useful dataset [Link](https://drive.google.com/file/d/1-PzvzJ0zCmFqqU01UONYgMLSe7tNUxGi/view?usp=sharing)

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression

In [None]:
data = _

X = data[["temperature","humidity","noise_db"]]
y = data["risk"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [None]:
# ===============================
# EARLY FUSION
# ===============================
rf_early = RandomForestClassifier(n_estimators=100, random_state=42)
rf_early.fit(X_train, y_train)
y_pred_early = rf_early.predict(X_test)
acc_early = accuracy_score(y_test, y_pred_early)
print(f"Early Fusion Accuracy: {acc_early:.3f}")

In [None]:
# ===============================
# INTERMEDIATE FUSION
# ===============================
# Cada fuente genera caracterÃ­sticas intermedias (media, varianza, etc.)
X_train_A = X_train[["temperature"]]
X_train_B = X_train[["humidity"]]
X_train_C = X_train[["noise_db"]]

# Crear representaciones intermedias (aquÃ­, simples estadÃ­sticas)
feat_A = pd.DataFrame({
    "mean_temp": [X_train_A.mean().values[0]],
    "std_temp": [X_train_A.std().values[0]]
})
feat_B = pd.DataFrame({
    "mean_hum": [X_train_B.mean().values[0]],
    "std_hum": [X_train_B.std().values[0]]
})
feat_C = pd.DataFrame({
    "mean_noise": [X_train_C.mean().values[0]],
    "std_noise": [X_train_C.std().values[0]]
})

# Fusionar caracterÃ­sticas
features = pd.concat([feat_A, feat_B, feat_C], axis=1)

# Crear un dataset expandido (repetir estas caracterÃ­sticas para todos los ejemplos)
X_train_inter = pd.concat([X_train.reset_index(drop=True),
                           pd.DataFrame(np.repeat(features.values, len(X_train), axis=0),
                                        columns=features.columns)], axis=1)
X_test_inter = pd.concat([X_test.reset_index(drop=True),
                          pd.DataFrame(np.repeat(features.values, len(X_test), axis=0),
                                       columns=features.columns)], axis=1)

rf_inter = RandomForestClassifier(n_estimators=100, random_state=42)
rf_inter.fit(X_train_inter, y_train)
y_pred_inter = rf_inter.predict(X_test_inter)
acc_inter = accuracy_score(y_test, y_pred_inter)
print(f"Intermediate Fusion Accuracy: {acc_inter:.3f}")

In [None]:
# ===============================
# LATE FUSION
# ===============================
# Entrenar un modelo por sensor
model_A = LogisticRegression().fit(X_train[["temperature"]], y_train)
model_B = LogisticRegression().fit(X_train[["humidity"]], y_train)
model_C = LogisticRegression().fit(X_train[["noise_db"]], y_train)

# Predicciones individuales (probabilidades)
pred_A = model_A.predict_proba(X_test[["temperature"]])[:,1]
pred_B = model_B.predict_proba(X_test[["humidity"]])[:,1]
pred_C = model_C.predict_proba(X_test[["noise_db"]])[:,1]

# FusiÃ³n tardÃ­a: promedio de probabilidades
pred_late = (pred_A + pred_B + pred_C)/3
y_pred_late = (pred_late>0.5).astype(int)

acc_late = accuracy_score(y_test, y_pred_late)
print(f"ðŸŒ‡ Late Fusion Accuracy: {acc_late:.3f}")

# ===============================
# COMPARACIÃ“N DE RESULTADOS
# ===============================
print("\nCOMPARACIÃ“N FINAL")
print(f"Early Fusion       â†’ {acc_early:.3f}")
print(f"Intermediate Fusion â†’ {acc_inter:.3f}")
print(f"Late Fusion         â†’ {acc_late:.3f}")