# Feature Switching 

### Switching between Deep (Mol2Vec) and Handcraft from baseline_mcccv.csv

![switch](switch.jpg)


In [None]:
import pandas as pd

def switch_and_save(input_csv1, input_csv2, columns_to_select, exclude_columns=False, include_last_column=True, output_csv1=None, output_csv2=None):
    # Step 1: Read the CSV files
    df1 = pd.read_csv(input_csv1)
    df2 = pd.read_csv(input_csv2)
    
    # Debug: Print the columns to check for mismatches
    print(f"Columns in {input_csv1}: {df1.columns}")
    print(f"Columns in {input_csv2}: {df2.columns}")
    
    # Clean column names by stripping spaces (optional)
    df1.columns = df1.columns.str.strip()
    df2.columns = df2.columns.str.strip()
    
    # Check for case sensitivity (optional)
    df1.columns = df1.columns.str.lower()
    df2.columns = df2.columns.str.lower()
    columns_to_select = [col.lower() for col in columns_to_select]
    
    # Step 2: Filter the columns based on selection or exclusion
    if exclude_columns:
        # Exclude the columns in the list from the dataframe
        selected_columns_df1 = df1.drop(columns=columns_to_select, errors='ignore')
        selected_columns_df2 = df2.drop(columns=columns_to_select, errors='ignore')
    else:
        # Select only the columns in the list from the dataframe
        selected_columns_df1 = df1[columns_to_select]
        selected_columns_df2 = df2[columns_to_select]
    
    # Step 3: Option to include or exclude the last column as label
    if include_last_column:
        label_df1 = df1.iloc[:, -1]  # Last column from df1
        label_df2 = df2.iloc[:, -1]  # Last column from df2
        selected_columns_df1['label'] = label_df1  # Add as 'label' column
        selected_columns_df2['label'] = label_df2  # Add as 'label' column
    
    # Step 4: Save the results to two new CSV files (without index)
    if output_csv1 is not None:
        selected_columns_df1.to_csv(output_csv1, index=False)
    if output_csv2 is not None:
        selected_columns_df2.to_csv(output_csv2, index=False)

# Usage
input_csv1 = 'feat_train_all.csv'  # Path to your first input CSV
input_csv2 = 'feat_test_all.csv'  # Path to your second input CSV

# Column_to_select is based on the list of best baseline mcccv
columns_to_select = ['SVMRBF-R6D300', 'SVMRBF-CKD', 'SVMRBF-R8D300', 'XGB-CKD', 'SVMRBF-R9D300', 'XGB-FP4C', 'SVMRBF-R7D300', 'SVMRBF-CKDExt', 'SVMRBF-R4D300', 'XGB-PubChem', 'SVMRBF-R5D300', 'ET-CKDExt', 'SVMRBF-R2D300', 'XGB-CKDExt', 'SVMRBF-R3D300', 'ET-FP4C', 'SVMRBF-R1D300', 'RF-CKDExt', 'MLP-R6D300', 'LGBM-PubChem', 'MLP-R9D300', 'ET-CKD', 'MLP-R4D300', 'LGBM-CKDExt', 'MLP-R7D300', 'LGBM-CKD', 'MLP-R5D300', 'MLP-CKD', 'MLP-R8D300', 'LGBM-FP4C', 'MLP-R1D300', 'LGBM-KRC', 'SVMRBF-R7D100', 'SVMRBF-RDKit', 'SVMRBF-R9D100', 'RF-FP4C', 'SVMRBF-R8D100', 'MLP-FP4C', 'SVMRBF-R4D100', 'MLP-CKDExt', 'SVMRBF-R5D100', 'XGB-KRC', 'SVMRBF-R6D100', 'SVMRBF-Circle', 'MLP-R2D300', 'ET-PubChem', 'MLP-R3D300', 'LGBM-RDKit', 'SVMRBF-R3D100', 'SVMRBF-FP4C', 'MLP-R8D100', 'MLP-RDKit', 'MLP-R5D100', 'ET-Circle', 'SVMRBF-R2D100', 'RF-CKD', 'MLP-R9D100', 'ET-RDKit', 'MLP-R6D100', 'SVMRBF-PubChem', 'MLP-R4D100', 'SVMRBF-Hybrid', 'SVMRBF-R1D100', 'ET-Hybrid', 'MLP-R7D100', 'RF-PubChem', 'LGBM-R7D300', 'XGB-KR', 'MLP-R2D100', 'LGBM-Hybrid', 'XGB-R7D300', 'RF-RDKit', 'MLP-R3D100', 'LGBM-KR', 'XGB-R8D300', 'RF-Hybrid', 'LGBM-R5D300', 'XGB-Circle', 'MLP-R1D100', 'LGBM-Circle', 'XGB-R9D300', 'XGB-RDKit', 'XGB-R5D300', 'XGB-Hybrid', 'XGB-R4D300', 'RF-Circle', 'XGB-R6D300', 'MLP-PubChem', 'XGB-R1D300', 'MLP-Circle', 'LGBM-R6D300', 'XGB-MACCS', 'LGBM-R8D300', 'LR-RDKit', 'LGBM-R9D300', 'ET-KRC', 'LGBM-R4D300', 'MLP-Hybrid', 'XGB-R7D100', 'MLP-KRC', 'LGBM-R7D100', 'RF-MACCS', 'LR-R6D300', 'SVMRBF-MACCS', 'XGB-R2D300', 'ET-MACCS', 'LR-R7D300', 'RF-KRC', 'LGBM-R2D300', 'MLP-KR', 'LGBM-R1D300', 'ET-KR', 'LGBM-R3D300', 'LGBM-MACCS', 'XGB-R6D100', 'PLS-RDKit', 'XGB-R3D100', 'RF-KR', 'XGB-R9D100', 'SVMRBF-KRC', 'LR-R8D300', 'MLP-MACCS', 'XGB-R3D300', 'ADA-KRC', 'LGBM-R5D100', 'LR-PubChem', 'XGB-R5D100', 'LDA-CKD', 'LR-R9D300', 'LDA-KRC', 'LR-R5D300', 'ADA-FP4C', 'LGBM-R8D100', 'ET-FP4', 'LR-R4D300', 'XGB-CKDGraph', 'XGB-R4D100', 'XGB-FP4', 'XGB-R8D100', 'RF-FP4', 'ET-R7D300', 'LDA-PubChem', 'ET-R5D300', 'RF-CKDGraph', 'ET-R9D300', 'LDA-RDKit', 'LGBM-R4D100', 'LDA-CKDExt', 'LGBM-R6D100', 'LR-CKD', 'ET-R6D300', 'ADA-KR', 'LGBM-R9D100', 'LR-Circle', 'LR-R2D300', 'LDA-FP4C', 'LR-R1D300', 'SVMRBF-KR', 'XGB-R2D100', 'LR-KRC', 'ET-R8D300', 'LGBM-CKDGraph', 'LGBM-R3D100', 'MLP-FP4', 'ET-R6D100', 'KNN-CKDExt', 'LDA-R8D300', 'LR-KR', 'ET-R5D100', 'ET-CKDGraph', 'LDA-R9D300', 'LDA-Circle', 'XGB-R1D100', 'SVMRBF-FP4', 'LGBM-R2D100', 'LGBM-FP4', 'LDA-R7D300', 'LDA-KR', 'ET-R4D300', 'PLS-Circle', 'LDA-R6D300', 'SVMRBF-CKDGraph', 'ET-R8D100', 'KNN-CKD', 'RF-R6D300', 'LR-CKDExt', 'RF-R8D300', 'LR-Hybrid', 'RF-R7D300', 'KNN-FP4C', 'RF-R5D300', 'MLP-CKDGraph', 'ET-R7D100', 'KNN-MACCS', 'LGBM-R1D100', 'LR-FP4C', 'ET-R9D100', 'ADA-CKD', 'RF-R9D300', 'ADA-PubChem', 'KNN-R7D300', 'LDA-Hybrid', 'KNN-R8D300', 'ADA-RDKit', 'KNN-R6D300', 'ADA-CKDExt', 'LR-R3D300', 'KNN-PubChem', 'LDA-R5D300', 'DT-FP4C', 'LDA-R1D300', 'PLS-KR', 'KNN-R9D300', 'KNN-Hybrid', 'LDA-R2D300', 'PLS-FP4C', 'LDA-R3D300', 'ADA-Circle', 'KNN-R5D300', 'KNN-RDKit', 'ET-R4D100', 'KNN-FP4', 'LDA-R4D300', 'DT-PubChem', 'RF-R6D100', 'KNN-Circle', 'ET-R3D300', 'PLS-KRC', 'RF-R7D100', 'ADA-Hybrid', 'RF-R4D300', 'NB-RDKit', 'KNN-R6D100', 'XGB-AP2D', 'RF-R4D100', 'LDA-MACCS', 'RF-R5D100', 'XGB-Estate', 'SVMRBF-R0D100', 'RF-AP2D', 'ET-R2D100', 'SVMRBF-Estate', 'ET-R1D300', 'LR-MACCS', 'ET-R2D300', 'ADA-MACCS', 'SVMRBF-R0D300', 'LGBM-Estate', 'KNN-R8D100', 'DT-CKDExt', 'RF-R8D100', 'ET-AP2D', 'KNN-R7D100', 'LGBM-AP2D', 'RF-R9D100', 'LR-CKDGraph', 'KNN-R4D300', 'RF-Estate', 'KNN-R9D100', 'ET-Estate', 'KNN-R1D300', 'NB-Circle', 'KNN-R3D300', 'LDA-CKDGraph', 'ET-R3D100', 'DT-CKD', 'KNN-R5D100', 'PLS-CKDExt', 'KNN-R2D300', 'DT-RDKit', 'ET-R1D100', 'KNN-CKDGraph', 'MLP-R0D100', 'PLS-CKD', 'RF-R1D300', 'DT-KR', 'RF-R3D300', 'DT-KRC', 'RF-R2D300', 'SVMRBF-AP2D', 'KNN-R4D100', 'DT-MACCS', 'LGBM-R0D300', 'DT-FP4', 'RF-R3D100', 'MLP-Estate', 'LDA-R9D100', 'KNN-KR', 'KNN-R1D100', 'MLP-AP2D', 'RF-R2D100', 'ADA-CKDGraph', 'MLP-R0D300', 'KNN-KRC', 'LDA-R8D100', 'LDA-FP4', 'KNN-R2D100', 'DT-CKDGraph', 'XGB-R0D300', 'KNN-Estate', 'LDA-R6D100', 'ADA-FP4', 'LDA-R7D100', 'DT-Hybrid', 'KNN-R3D100', 'LR-FP4', 'RF-R1D100', 'PLS-Hybrid', 'LDA-R5D100', 'DT-Estate', 'ADA-R5D300', 'PLS-FP4', 'LR-R7D100', 'PLS-PubChem', 'ADA-R6D300', 'NB-KR', 'LDA-R4D100', 'NB-KRC', 'ET-R0D300', 'DT-Circle', 'LDA-R3D100', 'NB-Hybrid', 'ADA-R7D300', 'NB-CKDExt', 'ADA-R9D300', 'LDA-Estate', 'LDA-R2D100', 'ADA-Estate', 'XGB-R0D100', 'PLS-MACCS', 'ADA-R8D100', 'PLS-Estate', 'ADA-R8D300', 'LR-Estate', 'LDA-R1D100', 'KNN-AP2D', 'ET-R0D100', 'NB-CKD', 'ADA-R7D100', 'DT-AP2D', 'LGBM-R0D100', 'PLS-CKDGraph', 'ADA-R4D300', 'LDA-AP2D', 'ADA-R9D100', 'NB-PubChem', 'ADA-R6D100', 'LR-AP2D', 'RF-R0D300', 'ADA-AP2D', 'ADA-R3D300', 'NB-FP4C', 'LR-R6D100', 'NB-FP4', 'ADA-R1D300', 'NB-MACCS', 'ADA-R2D300', 'PLS-AP2D', 'ADA-R5D100', 'NB-CKDGraph', 'RF-R0D100', 'NB-Estate', 'KNN-R0D300', 'NB-AP2D', 'ADA-R4D100', 'LR-R8D100', 'LR-R9D100', 'KNN-R0D100', 'ADA-R1D100', 'ADA-R2D100', 'ADA-R3D100', 'LR-R5D100', 'LR-R0D100', 'LDA-R0D100', 'LDA-R0D300', 'LR-R0D300', 'LR-R4D100', 'ADA-R0D300', 'LR-R1D100', 'ADA-R0D100', 'PLS-R6D300', 'LR-R3D100', 'PLS-R7D100', 'PLS-R5D300', 'PLS-R6D100', 'LR-R2D100', 'PLS-R8D100', 'PLS-R9D100', 'DT-R7D300', 'PLS-R7D300', 'PLS-R5D100', 'PLS-R9D300', 'PLS-R8D300', 'DT-R9D300', 'PLS-R4D300', 'DT-R5D300', 'DT-R6D100', 'NB-R7D300', 'DT-R8D300', 'NB-R8D100', 'NB-R9D100', 'NB-R7D100', 'DT-R6D300', 'NB-R6D300', 'DT-R7D100', 'PLS-R4D100', 'NB-R8D300', 'DT-R9D100', 'PLS-R3D300', 'DT-R8D100', 'NB-R9D300', 'PLS-R1D300', 'PLS-R2D300', 'DT-R5D100', 'NB-R6D100', 'PLS-R3D100', 'PLS-R1D100', 'NB-R5D300', 'DT-R4D100', 'DT-R1D300', 'PLS-R2D100', 'DT-R1D100', 'DT-R4D300', 'DT-R2D100', 'DT-R2D300', 'NB-R5D100', 'DT-R3D300', 'NB-R4D300', 'PLS-R0D300', 'DT-R3D100', 'DT-R0D300', 'NB-R4D100', 'DT-R0D100', 'NB-R1D100', 'NB-R3D100', 'PLS-R0D100', 'NB-R1D300', 'NB-R2D100', 'NB-R3D300', 'NB-R2D300', 'NB-R0D100', 'NB-R0D300']

# List of columns you want to select (or exclude)
exclude_columns = False  # Set to True if you want to exclude the columns instead of selecting them
include_last_column = True  # Set to True if you want to include the last column as a label
output_csv1 = 'train_switch2.csv'  
output_csv2 = 'test_switch2.csv'  

switch_and_save(input_csv1, input_csv2, columns_to_select, exclude_columns, include_last_column, output_csv1, output_csv2)
