In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, cross_val_score
from autogluon.tabular import TabularPredictor, TabularDataset
from sklearn.metrics import mean_squared_error, make_scorer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
predictor = TabularPredictor.load(rf"C:\Users\ldani\Documents\Patronus\Project\AutogluonModels\ag-20240612_190315")

In [11]:
# Cargar el archivo CSV
file_path = r'C:\Users\ldani\Documents\Patronus\Project\simulated-obstructive-disease-respiratory-pressure-and-flow-1.0.0\PQ_ProcessedData\ProcessedCOPD_Subject09_0cmH2O_0mL.csv'  # Ajusta la ruta según tu entorno
df = pd.read_csv(file_path)

# Mostrar las primeras filas del dataframe para entender su estructura
print(df.head())

print(df.columns)

   Time [s]  Pressure [cmH2O]  Flow [L/s]  V_tidal [L]  Inspiratory Indicies  \
0      0.00          2.649814   -0.681264     0.000000                   130   
1      0.01          2.617630   -0.626598    -0.006539                   501   
2      0.02          2.681998   -0.626598    -0.012805                   897   
3      0.03          2.563990   -0.626598    -0.019071                  1270   
4      0.04          2.649814   -0.626598    -0.025337                  1627   

   Weight [kg]  
0           95  
1            0  
2            0  
3            0  
4            0  
Index(['Time [s]', 'Pressure [cmH2O]', 'Flow [L/s]', 'V_tidal [L]',
       'Inspiratory Indicies', 'Weight [kg]'],
      dtype='object')


In [12]:
# Eliminar espacios en blanco adicionales en los nombres de las columnas
df.columns = df.columns.str.strip()

# Verificar nuevamente los nombres de las columnas después de limpiarlos
print(df.columns)

Index(['Time [s]', 'Pressure [cmH2O]', 'Flow [L/s]', 'V_tidal [L]',
       'Inspiratory Indicies', 'Weight [kg]'],
      dtype='object')


In [13]:
# Definir los valores para las nuevas columnas
peep = 0
age = 23
gender = 'Female'
height_cm = 175
weight_kg = 60
history_of_smoking = 'No'
smoking_frequency = 'N/A'
history_of_vaping = 'Yes'
frequency_of_vaping = 'More than once a day'
#heart_condition = 'No'
asthma = 'Yes - Moderate'

# Agregar las nuevas columnas al DataFrame y rellenarlas con el valor especificado
df['PEEP'] = peep
df['Weight [kg]'] = weight_kg
df['Age'] = age
df['Gender'] = gender
df['Height [cm]'] = height_cm
df['History of Smoking (yes/no)'] = history_of_smoking
df['Smoking Frequency'] = smoking_frequency
df['History of vaping (yes/no)'] = history_of_vaping
df['Frequency of vaping'] = frequency_of_vaping
#df['Heart condition (yes/no)'] = heart_condition
df['Asthma (yes/no and severity)'] = asthma


In [14]:
# Eliminar las columnas con valores faltantes
df = df.drop(columns=['Inspiratory Indicies', 'Smoking Frequency', 'Frequency of vaping'])

# Mostrar las primeras filas del dataframe para verificar los cambios
print(df.head())

   Time [s]  Pressure [cmH2O]  Flow [L/s]  V_tidal [L]  Weight [kg]  PEEP  \
0      0.00          2.649814   -0.681264     0.000000           60     0   
1      0.01          2.617630   -0.626598    -0.006539           60     0   
2      0.02          2.681998   -0.626598    -0.012805           60     0   
3      0.03          2.563990   -0.626598    -0.019071           60     0   
4      0.04          2.649814   -0.626598    -0.025337           60     0   

   Age  Gender  Height [cm] History of Smoking (yes/no)  \
0   23  Female          175                          No   
1   23  Female          175                          No   
2   23  Female          175                          No   
3   23  Female          175                          No   
4   23  Female          175                          No   

  History of vaping (yes/no) Asthma (yes/no and severity)  
0                        Yes               Yes - Moderate  
1                        Yes               Yes - Moderate  
2 

In [15]:
# Obtener las columnas esperadas por el modelo
expected_columns = predictor.feature_metadata_in.get_features()

# Crear un DataFrame con las columnas necesarias y rellenar con ceros
complete_df = pd.DataFrame(0, index=df.index, columns=expected_columns)

# Rellenar el DataFrame con los datos existentes
for col in df.columns:
    if col in complete_df.columns:
        complete_df[col] = df[col]

In [16]:
# Hacer predicciones en el conjunto de prueba
test_data = TabularDataset(complete_df)
predictions = predictor.predict(test_data)

# Agregar las predicciones al dataframe de prueba para verlas en contexto
df['Predicted_COPD'] = predictions

# Mostrar las primeras filas del dataframe con las predicciones
df['Predicted_COPD']
#print(df.head())

0       239.628433
1       239.628433
2       239.628433
3       239.628433
4       239.628433
           ...    
4357    239.628433
4358    239.628433
4359    239.628433
4360    239.628433
4361    239.628433
Name: Predicted_COPD, Length: 4362, dtype: float32

#Código anterior

In [None]:
# Preprocesar los datos
df_transformed = preprocessor.fit_transform(df)

# Convertir a DataFrame
df_transformed = pd.DataFrame(df_transformed, columns=numerical_features + list(preprocessor.named_transformers_['cat'].get_feature_names_out()))


In [None]:
# Convertir columnas categóricas a numéricas usando OneHotEncoder
categorical_features = ['Gender', 'History of Smoking (yes/no)', 'History of vaping (yes/no)', 'Asthma (yes/no and severity)']
numerical_features = ['Age', 'Height [cm]', 'Weight [kg]', 'PEEP', 'Time [s]', 'Pressure [cmH2O]', 'Flow [L/s]', 'V_tidal [L]']

# Crear el transformador de columnas
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])