Skip to content

Commit cc2aecd

Browse files
authored
Merge pull request #14 from ooyarce/omar_pipeline_new
results 13
2 parents adbb88b + ac416b0 commit cc2aecd

File tree

3 files changed

+296
-216
lines changed

3 files changed

+296
-216
lines changed

DataBase-Outputs/MaxShearX.pdf

99.4 KB
Binary file not shown.

Python Scripts/PySeesTKO/pyseestko/plotting.py

Lines changed: 139 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,24 @@
22
# IMPORT LIBRARIES
33
# ==================================================================================
44
# Objects
5-
from matplotlib.ticker import FuncFormatter
6-
from matplotlib import pyplot as plt
7-
from pathlib import Path
8-
from typing import Union, Tuple
9-
from numpy.typing import NDArray
10-
from scipy.signal import savgol_filter # Para suavizado
11-
from pyseestko.errors import PlottingError
12-
from pyseestko.utilities import pwl
13-
from typing import List
5+
from matplotlib.ticker import FuncFormatter
6+
from matplotlib import pyplot as plt
7+
from pathlib import Path
8+
from typing import Union, Tuple
9+
from numpy.typing import NDArray
10+
from scipy.signal import savgol_filter # Para suavizado
11+
from pyseestko.errors import PlottingError
12+
from pyseestko.utilities import pwl
13+
from typing import List
14+
from sklearn.preprocessing import PowerTransformer
15+
from scipy.stats import shapiro, fligner
16+
1417
# Packages
15-
import pandas as pd
16-
import numpy as np
18+
import statsmodels.api as sm
19+
import scipy.stats as stats
20+
import seaborn as sns
21+
import pandas as pd
22+
import numpy as np
1723
# ==================================================================================
1824
# MAIN FUNCTIONS CLASS
1925
# ==================================================================================
@@ -89,55 +95,129 @@ def plotStatisticByReplicas(df:pd.DataFrame, statistic:str, title:str, ylabel:st
8995
plt.show()
9096
return fig, ax
9197

92-
#NOTE:DEPRECATED
93-
def plotValidation(drifts_df_lst, spectra_df_lst, base_shear_df_lst):
94-
# DRIFTS
95-
mean_drift_x_df, mean_drift_y_df, mean_drift_df = getReplicaCummStatisticDriftDFs(drifts_df_lst, statistic='mean')
96-
std_drift_x_df, std_drift_y_df, std_drift_df = getReplicaCummStatisticDriftDFs(drifts_df_lst, statistic='std')
97-
98-
# Plot the mean drifts
99-
fig1, ax = plotStatisticByReplicas(df=mean_drift_x_df, statistic='mean', ylabel='Drift', title='Number of replicas vs Mean Drift X')
100-
fig2, ax = plotStatisticByReplicas(df=mean_drift_y_df, statistic='mean', ylabel='Drift', title='Number of replicas vs Mean Drift Y')
101-
fig1.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Mean Drift X.png')
102-
fig2.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Mean Drift Y.png')
103-
104-
# Plot the std drifts
105-
fig1, ax = plotStatisticByReplicas(df=std_drift_x_df, statistic='std', ylabel='Acceleration Spectra[m/s/s]', title='Number of replicas vs Std Drift X')
106-
fig2, ax = plotStatisticByReplicas(df=std_drift_y_df, statistic='std', ylabel='Acceleration Spectra[m/s/s]', title='Number of replicas vs Std Drift Y')
107-
fig1.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Std Drift X.png')
108-
fig2.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Std Drift Y.png')
109-
110-
# SPECTRUMSS
111-
mean_spectra_x_df, mean_spectra_y_df, mean_spectra_df = getCummStatisticSpectraDFs(spectra_df_lst, statistic='mean')
112-
std_spectra_x_df, std_spectra_y_df, std_spectra_df = getCummStatisticSpectraDFs(spectra_df_lst, statistic='std')
113-
114-
# Plot the mean spectra
115-
fig1, ax = plotStatisticByReplicas(df=mean_spectra_x_df, statistic='mean', ylabel='Drift', title='Number of replicas vs Mean Spectra X')
116-
fig2, ax = plotStatisticByReplicas(df=mean_spectra_y_df, statistic='mean', ylabel='Drift', title='Number of replicas vs Mean Spectra Y')
117-
fig1.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Mean Spectra X.png')
118-
fig2.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Mean Spectra Y.png')
119-
120-
# Plot the std spectra
121-
fig1, ax = plotStatisticByReplicas(df=std_spectra_x_df, statistic='std', ylabel='Acceleration Spectra[m/s/s]',title='Number of replicas vs Std Spectra X')
122-
fig2, ax = plotStatisticByReplicas(df=std_spectra_y_df, statistic='std', ylabel='Acceleration Spectra[m/s/s]',title='Number of replicas vs Std Spectra Y')
123-
fig1.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Std Spectra X.png')
124-
fig2.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Std Spectra Y.png')
125-
126-
# SHEAR BASE
127-
max_shear_x_df, max_shear_y_df, max_shear_df = getReplicaCummStatisticBaseShearDFs(base_shear_df_lst, statistic='mean')
128-
129-
# Plot the mean base shear
130-
fig1, ax = plotStatisticByReplicas(df=max_shear_x_df, statistic='mean', ylabel='Base Shear [kN]', title='Number of replicas vs Mean Base Shear X')
131-
fig2, ax = plotStatisticByReplicas(df=max_shear_y_df, statistic='mean', ylabel='Base Shear [kN]', title='Number of replicas vs Mean Base Shear Y')
132-
fig1.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Mean Base Shear X')
133-
fig2.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Mean Base Shear Y')
134-
135-
# Plot the std base shear
136-
fig1, ax = plotStatisticByReplicas(df=max_shear_x_df, statistic='std', ylabel='Base Shear [kN]', title='Number of replicas vs Std Base Shear X')
137-
fig2, ax = plotStatisticByReplicas(df=max_shear_y_df, statistic='std', ylabel='Base Shear [kN]', title='Number of replicas vs Std Base Shear Y')
138-
fig1.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Std Base Shear X')
139-
fig2.savefig('C:/Users/oioya/OneDrive - miuandes.cl/Escritorio/Git-Updated/Thesis-Project-Simulation-Data-Analysis/DataBase-Outputs/Analysis Output/Number of replicas vs Std Base Shear Y')
14098

99+
# Función para verificar supuestos y generar gráficos de normalidad
100+
def analyze_manova_assumptions(df, dependent_vars, group_vars, project_path, drift=True, xdir=True):
101+
plt.rcParams.update({
102+
"text.usetex": True,
103+
"font.size": 13,
104+
"font.family": "serif",
105+
"text.latex.preamble": r'\usepackage{amsmath}'
106+
})
107+
108+
# Filtrar solo las columnas de interés
109+
df = df[dependent_vars + group_vars]
110+
111+
# Transformación de Box-Cox
112+
pt = PowerTransformer(method='box-cox', standardize=True)
113+
X_transformed = pt.fit_transform(df[dependent_vars])
114+
df_transformed = pd.DataFrame(X_transformed, columns=dependent_vars)
115+
for group_var in group_vars:
116+
df_transformed[group_var] = df[group_var].values
117+
118+
# Verificación de normalidad individual con Shapiro-Wilk
119+
normality_results = {var: shapiro(X_transformed[:, i]).pvalue for i, var in enumerate(dependent_vars)}
120+
121+
# Verificación de homogeneidad de varianzas con Fligner-Killeen
122+
df_transformed['combined_factors'] = df_transformed[group_vars].astype(str).agg('-'.join, axis=1)
123+
homogeneity_results = {}
124+
for var in dependent_vars:
125+
grouped_data = [group[var].values for name, group in df_transformed.groupby('combined_factors')]
126+
_, p_value = fligner(*grouped_data)
127+
homogeneity_results[var] = p_value
128+
129+
# Generación de gráficos de normalidad en dos filas y cinco columnas
130+
fig, axes = plt.subplots(2, 5, figsize=(9, 5))
131+
132+
# Etiquetas de estaciones
133+
station_labels = [f'Estación {i}' for i in [1, 5, 10, 15, 20]]
134+
135+
for i, var in enumerate(dependent_vars):
136+
# Histograma en la primera fila
137+
sns.histplot(df_transformed[var], kde=True, ax=axes[0, i])
138+
axes[0, i].set_title(f'Histograma de normalidad\n{station_labels[i]}') if i == 2 else axes[0, i].set_title(station_labels[i])
139+
axes[0, i].set_xlabel('')
140+
axes[0, i].set_ylabel('') if i != 0 else axes[0, i].set_ylabel('Frecuencia')
141+
142+
# Q-Q plot en la segunda fila
143+
sm.qqplot(df_transformed[var], line='s', ax=axes[1, i])
144+
#axes[0, i].set_title(station_labels[i])
145+
axes[1, i].set_title(f'Q-Q Plot de normalidad\n{station_labels[i]}') if i == 2 else axes[1, i].set_title(station_labels[i])
146+
axes[1, i].set_ylabel('') if i != 0 else axes[1, i].set_ylabel('Cuantiles de la muestra')
147+
axes[1, i].set_xlabel('Cuantiles teóricos') if i==2 else axes[1, i].set_xlabel('')
148+
149+
plt.tight_layout()
150+
plt.show()
151+
file_name = 'drift' if drift else 'spectra'
152+
xdir = 'x' if xdir else 'y'
153+
fig.savefig(project_path / f'manova_supps_{file_name}_{xdir}.pdf', dpi=100)
154+
# Resultados en un DataFrame
155+
results_df = pd.DataFrame({
156+
'Variable': dependent_vars,
157+
'Normalidad (p-valor)': list(normality_results.values()),
158+
'Homogeneidad (p-valor)': list(homogeneity_results.values())
159+
})
160+
161+
return results_df
162+
163+
164+
def analyze_anova_assumptions(df, analysis_columns, project_path):
165+
"""
166+
Realiza un análisis de los supuestos para ANOVA en las columnas especificadas y guarda los resultados en archivos de imagen.
167+
168+
Parameters:
169+
df (pd.DataFrame): DataFrame con las columnas 'Sim_Type', 'Nsubs', 'Station', 'Iteration' y columnas de análisis.
170+
analysis_columns (list): Lista de las columnas para las cuales se desea realizar el análisis.
171+
output_file_prefix (str): Prefijo del nombre del archivo para guardar las imágenes de los resultados.
172+
"""
173+
plt.rcParams.update({
174+
"text.usetex": True,
175+
"font.size": 13,
176+
"font.family": "serif",
177+
"text.latex.preamble": r'\usepackage{amsmath}'
178+
})
179+
180+
for col in analysis_columns:
181+
# Agrupar por Sim_Type, Nsubs y Station, y calcular la media de la columna especificada
182+
df_grouped = df.groupby(['Sim_Type', 'Nsubs', 'Station']).agg({col: 'mean'}).reset_index()
183+
184+
# Aplicar la transformación Box-Cox
185+
df_grouped[f'{col}_boxcox'], fitted_lambda = stats.boxcox(df_grouped[col])
186+
187+
# Verificar normalidad usando la prueba de Shapiro-Wilk
188+
shapiro_test = stats.shapiro(df_grouped[f'{col}_boxcox'])
189+
190+
# Verificar homocedasticidad usando la prueba de Levene
191+
grouped_data = [df_grouped[df_grouped['Sim_Type'] == g][f'{col}_boxcox'].values for g in df_grouped['Sim_Type'].unique()]
192+
levene_test = stats.levene(*grouped_data)
193+
194+
# Crear la figura y los ejes
195+
fig, ax = plt.subplots(1, 2, figsize=(8, 5))
196+
197+
# Histograma para los datos transformados con Box-Cox
198+
sns.histplot(df_grouped[f'{col}_boxcox'], kde=True, ax=ax[0])
199+
ax[0].set_title(f'Histograma de {col}')
200+
201+
# Gráfico Q-Q para verificar normalidad
202+
sm.qqplot(df_grouped[f'{col}_boxcox'], line='s', ax=ax[1])
203+
ax[1].set_title(f'Q-Q Plot de {col}')
204+
205+
# Ajustar la disposición de la figura
206+
plt.tight_layout()
207+
208+
# Guardar la figura como imagen
209+
save_path = project_path / f'{col}.pdf'
210+
save_path.parent.mkdir(parents=True, exist_ok=True) # Crear directorio si no existe
211+
fig.savefig(save_path, dpi=100)
212+
plt.close(fig)
213+
214+
# Mostrar resultados de las pruebas en consola
215+
print(f'fitted_lambda: {fitted_lambda:.3f}')
216+
print(f"Resultados de la Prueba de Shapiro-Wilk para {col} (Normalidad)")
217+
print(f"Estadístico: {shapiro_test.statistic:.3f}, Valor p: {shapiro_test.pvalue:.3f}\n")
218+
219+
print(f"Resultados de la Prueba de Levene para {col} (Homocedasticidad)")
220+
print(f"Estadístico: {levene_test.statistic:.3f}, Valor p: {levene_test.pvalue:.3f}\n")
141221

142222

143223
# ==================================================================================

0 commit comments

Comments
 (0)