In [None]:
def calculate_descriptors(data):
    """
    Calculate descriptors for molecules in the given data.

    Args:
        data (DataFrame): DataFrame containing 'SMILES' column.

    Returns:
        DataFrame: DataFrame with added columns 'Mol' (molecular structure),
                   'Descriptors' (original descriptors), 'Descriptors0526' (processed descriptors),
                   and a standardized descriptor DataFrame (rdkitmd_df).
    """
    # Calculate descriptors
    des_list = [x[0] for x in Descriptors._descList]
    # Define the descriptor calculation function
    def get_fps(mol):
        """
        Calculate molecular descriptors for a given molecule.

        Args:
            mol (rdkit.Chem.rdchem.Mol): Molecule object.

        Returns:
            np.ndarray: Array of calculated descriptors.
        """
        calc = MoleculeDescriptors.MolecularDescriptorCalculator(des_list)
        ds = np.asarray(calc.CalcDescriptors(mol))
        return ds

    # Add structure and descriptor attributes
    # Mol is the molecular formula structure
    # Chem.MolFromSmiles converts the smiles string form to a molecular formula and visualizes it
    data['Mol'] = data['SMILES'].apply(Chem.MolFromSmiles)  # apply means batch processing
    data['Descriptors'] = data['Mol'].apply(get_fps)
    des = {}
    for i in range(len(des_list)):
        a = []
        for t in data['Descriptors']:
            a.append(t[i])
        des[des_list[i]] = a
    des = pd.DataFrame(des, columns=des_list)
    zeros = des.describe().loc['mean']  # Delete zero values and zero variance
    zerostd = des.describe().loc['std']
    delete = zeros[zeros == 0].index
    delete1 = delete.append(zerostd[zerostd == 0].index)
    des1 = des.drop(set(delete1), axis=1)
    # Delete high correlations
    cormatrix = des1.corr(method='pearson')
    highcor = []
    col = 0
    for t in cormatrix.columns:
        row = 0
        for i in cormatrix[t]:
            if abs(i) >= 0.95:
                if row > col:
                    highcor.append([t, cormatrix[t].index[row], i])
            row += 1
        col += 1
    delecor = []
    for i in highcor:
        delecor.append(i[0])
    des2 = des1.drop(set(delecor), axis=1)
    cura_des_list = des2.columns
    calc1 = MoleculeDescriptors.MolecularDescriptorCalculator(cura_des_list)
    data['Descriptors2'] = data['Mol'].apply(calc1.CalcDescriptors)
    rdkitmd = np.array(list(data['Descriptors2']))
    from sklearn.preprocessing import MinMaxScaler
    # Create a MinMaxScaler object
    scaler = MinMaxScaler(feature_range=(0, 1))
    # Standardize the array
    normalized_descriptor = scaler.fit_transform(rdkitmd)
    rdkitmd_df = pd.DataFrame(normalized_descriptor)
    return data, rdkitmd_df

if __name__ == "__main__":
    data = pd.read_excel('demo.xlsx')
    data_with_descriptors, standardized_descriptors_df = calculate_descriptors(data)
    print(data_with_descriptors)
    print(standardized_descriptors_df)