### **Uses ML to detect the ingredients of medicines and warn against ingredients that should not be mixed.**

In [41]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import KNNImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics

def convert_to_nan(data):
    """
    Replaces a specific string in the input pandas dataframe with NaN values.
    """
    # Check if the input is a pandas dataframe
    if not isinstance(data, pd.DataFrame):
        raise TypeError("Input must be a pandas dataframe")
    # Replace the string with NaN
    data = data.replace('Ofloxacin Otic Solution (Floxin Otic Singles)', pd.NA)
    # Return the processed dataframe
    return data

def clean_numeric_columns(df):
    """
    Cleans up numeric columns in the input pandas dataframe by removing commas and converting them to numeric data types.
    """
    # Make a copy of the input dataframe
    df_copy = df.copy()
    for column in df_copy.columns:
        if df_copy[column].dtype == 'object':
            df_copy[column] = df_copy[column].str.replace(',', '')
            if df_copy[column].str.strip().str.len().eq(0).any():
                df_copy[column] = df_copy[column].replace('', '0')
            df_copy[column] = pd.to_numeric(df_copy[column], errors='coerce')
    df_copy = df_copy.dropna(how='all', axis=1)
    return df_copy

def check_constraints(row, interaction_constraints):
    """
    Checks if a combination of ingredients violates any interaction constraints.
    """
    for key in interaction_constraints.keys():
        if row[key] == 1:
            for value in interaction_constraints[key]:
                if row[value] == 1:
                    return False
    return True


def detect_ingredient_interactions(data_file):
    
    try:
        # Load the dataset
        data = pd.read_csv(data_file)

        # Remove duplicates
        data = data.drop_duplicates()

        # Convert certain values to NaN
        data = convert_to_nan(data)

        # Clean up numeric columns
        data = clean_numeric_columns(data)

        # Apply KNNImputer
        imputer = KNNImputer(n_neighbors=5)
        data = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

        # Check if there are enough samples to split the data
        if len(data) == 0:
            raise ValueError("There are no samples in the dataset")

        # Define the target feature
        target_feature = 'is_safe'

        # Define the rules or constraints for ingredient interactions
        interaction_constraints = {}
        for col in data.columns:
            if col.startswith('Excipients'):
                interaction_constraints[col] = [c for c in data.columns if c != col]

        # Add a column to the dataset indicating whether a combination of ingredients is safe or not
        data[target_feature] = data.apply(lambda row: check_constraints(row, interaction_constraints), axis=1)

        # Split dataset into training and testing sets
        train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

        # Define the preprocessing steps for the numerical and categorical features
        # Preprocessing for numeric features
        numeric_transformer = Pipeline(steps=[
            ('imputer', KNNImputer(n_neighbors=5)),
            ('scaler', StandardScaler())
        ])

        # Preprocessing for categorical features
        categorical_transformer = Pipeline(steps=[
            ('onehot', OneHotEncoder(handle_unknown='ignore'))
        ])

        # Define the column transformer to apply the preprocessing steps to different features
        preprocessor = ColumnTransformer(
            transformers=[
                ('num', numeric_transformer, data.select_dtypes(include=['float64', 'int64']).columns),
                ('cat', categorical_transformer, data.select_dtypes(include=['object']).columns)
            ])

        # Define the classifier
        classifier = RandomForestClassifier(n_estimators=100, random_state=42)

        # Define the pipeline
        pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                                   ('classifier', classifier)])

        # Fit the pipeline on the training data
        pipeline.fit(train_data.drop(target_feature, axis=1), train_data[target_feature])

        # Predict on the test data
        y_pred = pipeline.predict(test_data.drop(target_feature, axis=1))

        # Print the classification report
        print(metrics.classification_report(test_data[target_feature], y_pred))

    except Exception as e:
        print("Error: ", str(e))


**Replace the 'data.csv' with the unseen data to check the Accuracy of this Model**

In [42]:
detect_ingredient_interactions("data.csv")

              precision    recall  f1-score   support

        True       1.00      1.00      1.00      1337

    accuracy                           1.00      1337
   macro avg       1.00      1.00      1.00      1337
weighted avg       1.00      1.00      1.00      1337

