|
2 | 2 | # IMPORT LIBRARIES |
3 | 3 | # ================================================================================== |
4 | 4 | # Objects |
5 | | -from matplotlib.ticker import FuncFormatter |
6 | | -from matplotlib import pyplot as plt |
7 | | -from pathlib import Path |
8 | | -from typing import Union, Tuple |
9 | | -from numpy.typing import NDArray |
10 | | -from scipy.signal import savgol_filter # Para suavizado |
11 | | -from pyseestko.errors import PlottingError |
12 | | -from pyseestko.utilities import pwl |
13 | | -from typing import List |
| 5 | +from matplotlib.ticker import FuncFormatter |
| 6 | +from matplotlib import pyplot as plt |
| 7 | +from pathlib import Path |
| 8 | +from typing import Union, Tuple |
| 9 | +from numpy.typing import NDArray |
| 10 | +from scipy.signal import savgol_filter # Para suavizado |
| 11 | +from pyseestko.errors import PlottingError |
| 12 | +from pyseestko.utilities import pwl |
| 13 | +from typing import List |
| 14 | +from sklearn.preprocessing import PowerTransformer |
| 15 | +from scipy.stats import shapiro, fligner |
| 16 | + |
14 | 17 | # Packages |
15 | | -import pandas as pd |
16 | | -import numpy as np |
| 18 | +import statsmodels.api as sm |
| 19 | +import scipy.stats as stats |
| 20 | +import seaborn as sns |
| 21 | +import pandas as pd |
| 22 | +import numpy as np |
17 | 23 | # ================================================================================== |
18 | 24 | # MAIN FUNCTIONS CLASS |
19 | 25 | # ================================================================================== |
@@ -89,55 +95,129 @@ def plotStatisticByReplicas(df:pd.DataFrame, statistic:str, title:str, ylabel:st |
89 | 95 | plt.show() |
90 | 96 | return fig, ax |
91 | 97 |
|
92 | | -#NOTE:DEPRECATED |
93 | | -def plotValidation(drifts_df_lst, spectra_df_lst, base_shear_df_lst): |
94 | | - # DRIFTS |
95 | | - mean_drift_x_df, mean_drift_y_df, mean_drift_df = getReplicaCummStatisticDriftDFs(drifts_df_lst, statistic='mean') |
96 | | - std_drift_x_df, std_drift_y_df, std_drift_df = getReplicaCummStatisticDriftDFs(drifts_df_lst, statistic='std') |
97 | | - |
98 | | - # Plot the mean drifts |
99 | | - fig1, ax = plotStatisticByReplicas(df=mean_drift_x_df, statistic='mean', ylabel='Drift', title='Number of replicas vs Mean Drift X') |
100 | | - fig2, ax = plotStatisticByReplicas(df=mean_drift_y_df, statistic='mean', ylabel='Drift', title='Number of replicas vs Mean Drift Y') |
101 | | - fig1.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Mean Drift X.png') |
102 | | - fig2.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Mean Drift Y.png') |
103 | | - |
104 | | - # Plot the std drifts |
105 | | - fig1, ax = plotStatisticByReplicas(df=std_drift_x_df, statistic='std', ylabel='Acceleration Spectra[m/s/s]', title='Number of replicas vs Std Drift X') |
106 | | - fig2, ax = plotStatisticByReplicas(df=std_drift_y_df, statistic='std', ylabel='Acceleration Spectra[m/s/s]', title='Number of replicas vs Std Drift Y') |
107 | | - fig1.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Std Drift X.png') |
108 | | - fig2.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Std Drift Y.png') |
109 | | - |
110 | | - # SPECTRUMSS |
111 | | - mean_spectra_x_df, mean_spectra_y_df, mean_spectra_df = getCummStatisticSpectraDFs(spectra_df_lst, statistic='mean') |
112 | | - std_spectra_x_df, std_spectra_y_df, std_spectra_df = getCummStatisticSpectraDFs(spectra_df_lst, statistic='std') |
113 | | - |
114 | | - # Plot the mean spectra |
115 | | - fig1, ax = plotStatisticByReplicas(df=mean_spectra_x_df, statistic='mean', ylabel='Drift', title='Number of replicas vs Mean Spectra X') |
116 | | - fig2, ax = plotStatisticByReplicas(df=mean_spectra_y_df, statistic='mean', ylabel='Drift', title='Number of replicas vs Mean Spectra Y') |
117 | | - fig1.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Mean Spectra X.png') |
118 | | - fig2.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Mean Spectra Y.png') |
119 | | - |
120 | | - # Plot the std spectra |
121 | | - fig1, ax = plotStatisticByReplicas(df=std_spectra_x_df, statistic='std', ylabel='Acceleration Spectra[m/s/s]',title='Number of replicas vs Std Spectra X') |
122 | | - fig2, ax = plotStatisticByReplicas(df=std_spectra_y_df, statistic='std', ylabel='Acceleration Spectra[m/s/s]',title='Number of replicas vs Std Spectra Y') |
123 | | - fig1.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Std Spectra X.png') |
124 | | - fig2.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Std Spectra Y.png') |
125 | | - |
126 | | - # SHEAR BASE |
127 | | - max_shear_x_df, max_shear_y_df, max_shear_df = getReplicaCummStatisticBaseShearDFs(base_shear_df_lst, statistic='mean') |
128 | | - |
129 | | - # Plot the mean base shear |
130 | | - fig1, ax = plotStatisticByReplicas(df=max_shear_x_df, statistic='mean', ylabel='Base Shear [kN]', title='Number of replicas vs Mean Base Shear X') |
131 | | - fig2, ax = plotStatisticByReplicas(df=max_shear_y_df, statistic='mean', ylabel='Base Shear [kN]', title='Number of replicas vs Mean Base Shear Y') |
132 | | - fig1.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Mean Base Shear X') |
133 | | - fig2.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Mean Base Shear Y') |
134 | | - |
135 | | - # Plot the std base shear |
136 | | - fig1, ax = plotStatisticByReplicas(df=max_shear_x_df, statistic='std', ylabel='Base Shear [kN]', title='Number of replicas vs Std Base Shear X') |
137 | | - fig2, ax = plotStatisticByReplicas(df=max_shear_y_df, statistic='std', ylabel='Base Shear [kN]', title='Number of replicas vs Std Base Shear Y') |
138 | | - fig1.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Std Base Shear X') |
139 | | - fig2.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Std Base Shear Y') |
140 | 98 |
|
| 99 | +# Función para verificar supuestos y generar gráficos de normalidad |
| 100 | +def analyze_manova_assumptions(df, dependent_vars, group_vars, project_path, drift=True, xdir=True): |
| 101 | + plt.rcParams.update({ |
| 102 | + "text.usetex": True, |
| 103 | + "font.size": 13, |
| 104 | + "font.family": "serif", |
| 105 | + "text.latex.preamble": r'\usepackage{amsmath}' |
| 106 | + }) |
| 107 | + |
| 108 | + # Filtrar solo las columnas de interés |
| 109 | + df = df[dependent_vars + group_vars] |
| 110 | + |
| 111 | + # Transformación de Box-Cox |
| 112 | + pt = PowerTransformer(method='box-cox', standardize=True) |
| 113 | + X_transformed = pt.fit_transform(df[dependent_vars]) |
| 114 | + df_transformed = pd.DataFrame(X_transformed, columns=dependent_vars) |
| 115 | + for group_var in group_vars: |
| 116 | + df_transformed[group_var] = df[group_var].values |
| 117 | + |
| 118 | + # Verificación de normalidad individual con Shapiro-Wilk |
| 119 | + normality_results = {var: shapiro(X_transformed[:, i]).pvalue for i, var in enumerate(dependent_vars)} |
| 120 | + |
| 121 | + # Verificación de homogeneidad de varianzas con Fligner-Killeen |
| 122 | + df_transformed['combined_factors'] = df_transformed[group_vars].astype(str).agg('-'.join, axis=1) |
| 123 | + homogeneity_results = {} |
| 124 | + for var in dependent_vars: |
| 125 | + grouped_data = [group[var].values for name, group in df_transformed.groupby('combined_factors')] |
| 126 | + _, p_value = fligner(*grouped_data) |
| 127 | + homogeneity_results[var] = p_value |
| 128 | + |
| 129 | + # Generación de gráficos de normalidad en dos filas y cinco columnas |
| 130 | + fig, axes = plt.subplots(2, 5, figsize=(9, 5)) |
| 131 | + |
| 132 | + # Etiquetas de estaciones |
| 133 | + station_labels = [f'Estación {i}' for i in [1, 5, 10, 15, 20]] |
| 134 | + |
| 135 | + for i, var in enumerate(dependent_vars): |
| 136 | + # Histograma en la primera fila |
| 137 | + sns.histplot(df_transformed[var], kde=True, ax=axes[0, i]) |
| 138 | + axes[0, i].set_title(f'Histograma de normalidad\n{station_labels[i]}') if i == 2 else axes[0, i].set_title(station_labels[i]) |
| 139 | + axes[0, i].set_xlabel('') |
| 140 | + axes[0, i].set_ylabel('') if i != 0 else axes[0, i].set_ylabel('Frecuencia') |
| 141 | + |
| 142 | + # Q-Q plot en la segunda fila |
| 143 | + sm.qqplot(df_transformed[var], line='s', ax=axes[1, i]) |
| 144 | + #axes[0, i].set_title(station_labels[i]) |
| 145 | + axes[1, i].set_title(f'Q-Q Plot de normalidad\n{station_labels[i]}') if i == 2 else axes[1, i].set_title(station_labels[i]) |
| 146 | + axes[1, i].set_ylabel('') if i != 0 else axes[1, i].set_ylabel('Cuantiles de la muestra') |
| 147 | + axes[1, i].set_xlabel('Cuantiles teóricos') if i==2 else axes[1, i].set_xlabel('') |
| 148 | + |
| 149 | + plt.tight_layout() |
| 150 | + plt.show() |
| 151 | + file_name = 'drift' if drift else 'spectra' |
| 152 | + xdir = 'x' if xdir else 'y' |
| 153 | + fig.savefig(project_path / f'manova_supps_{file_name}_{xdir}.pdf', dpi=100) |
| 154 | + # Resultados en un DataFrame |
| 155 | + results_df = pd.DataFrame({ |
| 156 | + 'Variable': dependent_vars, |
| 157 | + 'Normalidad (p-valor)': list(normality_results.values()), |
| 158 | + 'Homogeneidad (p-valor)': list(homogeneity_results.values()) |
| 159 | + }) |
| 160 | + |
| 161 | + return results_df |
| 162 | + |
| 163 | + |
| 164 | +def analyze_anova_assumptions(df, analysis_columns, project_path): |
| 165 | + """ |
| 166 | + Realiza un análisis de los supuestos para ANOVA en las columnas especificadas y guarda los resultados en archivos de imagen. |
| 167 | +
|
| 168 | + Parameters: |
| 169 | + df (pd.DataFrame): DataFrame con las columnas 'Sim_Type', 'Nsubs', 'Station', 'Iteration' y columnas de análisis. |
| 170 | + analysis_columns (list): Lista de las columnas para las cuales se desea realizar el análisis. |
| 171 | + output_file_prefix (str): Prefijo del nombre del archivo para guardar las imágenes de los resultados. |
| 172 | + """ |
| 173 | + plt.rcParams.update({ |
| 174 | + "text.usetex": True, |
| 175 | + "font.size": 13, |
| 176 | + "font.family": "serif", |
| 177 | + "text.latex.preamble": r'\usepackage{amsmath}' |
| 178 | + }) |
| 179 | + |
| 180 | + for col in analysis_columns: |
| 181 | + # Agrupar por Sim_Type, Nsubs y Station, y calcular la media de la columna especificada |
| 182 | + df_grouped = df.groupby(['Sim_Type', 'Nsubs', 'Station']).agg({col: 'mean'}).reset_index() |
| 183 | + |
| 184 | + # Aplicar la transformación Box-Cox |
| 185 | + df_grouped[f'{col}_boxcox'], fitted_lambda = stats.boxcox(df_grouped[col]) |
| 186 | + |
| 187 | + # Verificar normalidad usando la prueba de Shapiro-Wilk |
| 188 | + shapiro_test = stats.shapiro(df_grouped[f'{col}_boxcox']) |
| 189 | + |
| 190 | + # Verificar homocedasticidad usando la prueba de Levene |
| 191 | + grouped_data = [df_grouped[df_grouped['Sim_Type'] == g][f'{col}_boxcox'].values for g in df_grouped['Sim_Type'].unique()] |
| 192 | + levene_test = stats.levene(*grouped_data) |
| 193 | + |
| 194 | + # Crear la figura y los ejes |
| 195 | + fig, ax = plt.subplots(1, 2, figsize=(8, 5)) |
| 196 | + |
| 197 | + # Histograma para los datos transformados con Box-Cox |
| 198 | + sns.histplot(df_grouped[f'{col}_boxcox'], kde=True, ax=ax[0]) |
| 199 | + ax[0].set_title(f'Histograma de {col}') |
| 200 | + |
| 201 | + # Gráfico Q-Q para verificar normalidad |
| 202 | + sm.qqplot(df_grouped[f'{col}_boxcox'], line='s', ax=ax[1]) |
| 203 | + ax[1].set_title(f'Q-Q Plot de {col}') |
| 204 | + |
| 205 | + # Ajustar la disposición de la figura |
| 206 | + plt.tight_layout() |
| 207 | + |
| 208 | + # Guardar la figura como imagen |
| 209 | + save_path = project_path / f'{col}.pdf' |
| 210 | + save_path.parent.mkdir(parents=True, exist_ok=True) # Crear directorio si no existe |
| 211 | + fig.savefig(save_path, dpi=100) |
| 212 | + plt.close(fig) |
| 213 | + |
| 214 | + # Mostrar resultados de las pruebas en consola |
| 215 | + print(f'fitted_lambda: {fitted_lambda:.3f}') |
| 216 | + print(f"Resultados de la Prueba de Shapiro-Wilk para {col} (Normalidad)") |
| 217 | + print(f"Estadístico: {shapiro_test.statistic:.3f}, Valor p: {shapiro_test.pvalue:.3f}\n") |
| 218 | + |
| 219 | + print(f"Resultados de la Prueba de Levene para {col} (Homocedasticidad)") |
| 220 | + print(f"Estadístico: {levene_test.statistic:.3f}, Valor p: {levene_test.pvalue:.3f}\n") |
141 | 221 |
|
142 | 222 |
|
143 | 223 | # ================================================================================== |
|
0 commit comments