In [None]:
# Environment settings for importing from 'src/' 
import sys
import pandas as pd
from pathlib import Path


# Detect root of project (upload from notebooks/reporting/)
project_root = Path.cwd().parents[1]
src_path = project_root / "src"


if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))
 
try:
    from preprocessing.preprocess import process_data
    print("O ----- preprocessing.preprocess importado correctamente.")
except Exception as e:
    print("X ----- Error en preprocessing.preprocess:", e)

try:
    from analysis.features import FeatureExtractor
    print("O ----- analysis.features importado correctamente.")
except Exception as e:
    print("X ----- Error en analysis.features:", e)

try:
    from analysis.statistics import (
        separability_measure,
        compute_covariance_matrix,
        compute_eigen_decomposition,
        anova_test,
        compute_fisher_ratio,
        
    )
    print("O ----- analysis.statistics importado correctamente.")
except Exception as e:
    print("X ----- Error en analysis.statistics:", e)

try:
    from analysis.model import train_and_evaluate_logistic_regression
    print("O ----- analysis.model importado correctamente.")
except Exception as e:
    print("X ----- Error en analysis.model:", e)

try:
    import visualization.plots as plots
    print("O ----- visualization.plots importado correctamente.")
except Exception as e:
    print("X ----- Error en visualization.plots:", e)



O ----- preprocessing.preprocess importado correctamente.
O ----- analysis.features importado correctamente.
O ----- analysis.statistics importado correctamente.
O ----- analysis.model importado correctamente.
O ----- visualization.plots importado correctamente.


In [16]:
#  1. Preprocessing 

print("--- Preprocessing ---")
data = process_data(
    
    input_path= project_root / "data/raw/data.csv",
    output_path=project_root / "data/processed/cleaned_data.csv"
)
data

--- Preprocessing ---
Cleaned and scaled data saved to: c:\Users\juane\OneDrive\Documentos\DOCUMENTOS MAESTRIA\programacion cientifica\data\processed\cleaned_data.csv


Unnamed: 0,Hora_PC,Temperatura,Humedad,Presion,Altitud,Lluvia
0,2025-03-24 15:27:35,2.078398,-2.243462,-2.360755,2.337728,1
1,2025-03-24 15:28:33,2.141883,-2.290703,-2.287881,2.305963,1
2,2025-03-24 15:29:33,2.139123,-2.255460,-2.287881,2.317728,1
3,2025-03-24 15:30:33,2.059077,-2.282455,-2.360755,2.370670,1
4,2025-03-24 15:31:33,2.012153,-2.306451,-2.287881,2.316552,1
...,...,...,...,...,...,...
9263,2025-03-31 13:25:59,-0.049738,-0.155085,-0.611784,0.611820,0
9264,2025-03-31 13:26:59,-0.088381,0.118616,-0.611784,0.578879,0
9265,2025-03-31 13:27:59,-0.049738,0.180105,-0.611784,0.601232,0
9266,2025-03-31 13:29:00,0.010987,0.150110,-0.611784,0.580055,0


In [20]:
#  2. Feature extraction 

print("--- Feature Extraction ---")
features = FeatureExtractor(data)
columns = data.columns[1:-1]  

time_features = features.extract_time_features(columns)
freq_features = features.extract_frequency_features(columns)
deriv_features = features.extract_derivative_features(columns)
print(time_features)
print(freq_features)
print(deriv_features)

--- Feature Extraction ---
                     mean  variance       rms
Temperatura -1.778657e-16  0.999892  0.999946
Humedad     -5.335970e-16  0.999892  0.999946
Presion      2.789425e-14  0.999892  0.999946
Altitud     -1.806625e-13  0.999892  0.999946
             peak_freq  spectral_centroid  spectral_bandwidth
Temperatura   0.000647           0.001937            0.011739
Humedad       0.000647           0.001592            0.011725
Presion       0.001403           0.001623            0.009392
Altitud       0.001403           0.001527            0.007546
             mean_abs_change  std_derivative
Temperatura         0.034399        0.061365
Humedad             0.033608        0.056440
Presion             0.019015        0.045597
Altitud             0.019477        0.036159


In [None]:
#  3. Estadística y separabilidad 

print("--- Separability Analysis ---")
label_col = data.columns[-1]  
separability = separability_measure(data, columns.tolist(), label_col)
print(separability)

--- Separability Analysis ---
       feature  fisher_ratio
0      Humedad      0.891560
1  Temperatura      0.563495
2      Altitud      0.000025
3      Presion      0.000017


In [None]:
#  4. View 

print("--- Visualizations ---")

data_dir = Path(project_root /"results/figures")
data_dir.mkdir(parents=True, exist_ok=True)

plots.plot_features_boxplot(data, output_path=data_dir / "boxplot.png")
plots.plot_pairplot(data, hue_col=label_col, output_path=data_dir / "pairplot.png")
plots.plot_feature_density(data, features=columns.tolist(), hue_col=label_col, output_path=data_dir / "density.png")

--- Visualizations ---
Box plot guardado en: c:\Users\juane\OneDrive\Documentos\DOCUMENTOS MAESTRIA\programacion cientifica\results\figures\boxplot.png


  func(x=x, y=y, **kwargs)
  func(x=x, y=y, **kwargs)
  func(x=x, y=y, **kwargs)
  func(x=x, y=y, **kwargs)
  func(x=x, y=y, **kwargs)
  func(x=x, y=y, **kwargs)
  func(x=x, y=y, **kwargs)
  func(x=x, y=y, **kwargs)
  func(x=x, y=y, **kwargs)
  func(x=x, y=y, **kwargs)
  func(x=x, y=y, **kwargs)
  func(x=x, y=y, **kwargs)


In [None]:
#  5. PCA 

print("--- PCA Analysis ---")
cov_matrix = compute_covariance_matrix(data, columns.tolist(), output_path=Path(project_root / "results/tables/cov_matrix.csv"))
eig_vals, eig_vecs = compute_eigen_decomposition(cov_matrix)
plots.plot_eigenvectors_2d(data[columns].values, eig_vals, eig_vecs, columns.tolist(), output_path=data_dir / "eigenvectors.png")


X_pca = FeatureExtractor.apply_pca_transformation(data, n_components=2)
plots.plot_pca_dimensionality_reduction(X_pca, data[label_col], output_path=data_dir / "pca_reduction.png")

--- PCA Analysis ---
Temperatura    float64
Humedad        float64
Presion        float64
Altitud        float64
Lluvia           int64
dtype: object
Temperatura    float64
Humedad        float64
Presion        float64
Altitud        float64
Lluvia           int64
dtype: object


In [None]:
#  6. Model ML 

print("--- Model Training ---")
metrics = train_and_evaluate_logistic_regression(data, features=columns.tolist(), target=label_col)

--- Model Training ---
Métricas de rendimiento de Regresión Logística:
Accuracy:    0.8511
Precision:   0.8720
Recall:      0.9185
F1-score:    0.8947
ROC-AUC:     0.9202


In [21]:
#  7. Export summary 
summary_path = Path(project_root / "results/tables/summary_metrics.csv")
summary_path.parent.mkdir(parents=True, exist_ok=True)
pd.DataFrame([metrics]).to_csv(summary_path, index=False)
