In [18]:
import pandas as pd

In [19]:
df = pd.read_csv('results.csv')
print(df.head())

     Model                         Parameters  Accuracy  Precision    Recall  \
0  XGBoost  n_estimators=50, lr=0.01, depth=3  0.727466   0.529207  0.727466   
1  XGBoost  n_estimators=50, lr=0.01, depth=5  0.752307   0.778130  0.752307   
2  XGBoost  n_estimators=50, lr=0.01, depth=7  0.753016   0.779310  0.753016   
3  XGBoost  n_estimators=50, lr=0.05, depth=3  0.782115   0.767693  0.782115   
4  XGBoost  n_estimators=50, lr=0.05, depth=5  0.792761   0.782358  0.792761   

   F1-Score   AUC-ROC  
0  0.612698  0.500000  
1  0.673886  0.551273  
2  0.675280  0.552575  
3  0.767506  0.675174  
4  0.784360  0.705291  


In [20]:
def split_parameters(params_str):
    params = dict(item.split('=') for item in params_str.split(','))
    return pd.Series(params)

# Apply transformation
parameters_df = df['Parameters'].apply(split_parameters)
df = pd.concat([df.drop('Parameters', axis=1), parameters_df], axis=1)

# Fill missing values with 'N/A' for clarity in the Streamlit app
df.fillna('N/A', inplace=True)

# Check the transformation
print(df.head())

# Save the transformed CSV


     Model  Accuracy  Precision    Recall  F1-Score   AUC-ROC n_estimators  \
0  XGBoost  0.727466   0.529207  0.727466  0.612698  0.500000           50   
1  XGBoost  0.752307   0.778130  0.752307  0.673886  0.551273           50   
2  XGBoost  0.753016   0.779310  0.753016  0.675280  0.552575           50   
3  XGBoost  0.782115   0.767693  0.782115  0.767506  0.675174           50   
4  XGBoost  0.792761   0.782358  0.792761  0.784360  0.705291           50   

     lr  depth n_d_n_a  epochs  max_depth  min_samples_split  
0  0.01      3     N/A     N/A        N/A                N/A  
1  0.01      5     N/A     N/A        N/A                N/A  
2  0.01      7     N/A     N/A        N/A                N/A  
3  0.05      3     N/A     N/A        N/A                N/A  
4  0.05      5     N/A     N/A        N/A                N/A  


In [21]:
df.to_csv('transformed_results.csv', index=False)

In [23]:
def clean_data(file_path):
    # Load the data
    data = pd.read_csv(file_path)
    
    # Strip extra spaces from column names
    data.columns = data.columns.str.strip()
    
    # Convert 'max_depth' to numeric, handling 'None' as NaN
    data['max_depth'] = pd.to_numeric(data['max_depth'], errors='coerce')
    
    # Optionally, you might want to fill NaNs for specific columns if required,
    # or you can handle them dynamically in the Streamlit app as needed.
    # For example, if you want to set NaNs to a default value in a column:
    # data['some_column'] = data['some_column'].fillna(default_value)

    # Save the cleaned data to a new CSV file
    cleaned_file_path = file_path.replace('.csv', '_cleaned.csv')
    data.to_csv(cleaned_file_path, index=False)
    
    return cleaned_file_path

# Use the function
file_path = 'transformed_results.csv'  # Change this to your actual file path
cleaned_data_path = clean_data(file_path)
print(f"Cleaned data saved to: {cleaned_data_path}")

Cleaned data saved to: transformed_results_cleaned.csv
