Unit Testing

In [2]:


import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
from sklearn.ensemble import RandomForestClassifier



def test_random_forest():
    # Load dataset
    test_data = pd.read_csv("heart4.csv")
    X = test_data.drop('TenYearCHD', axis=1)
    y = test_data['TenYearCHD']

    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42)

    # Create random forest model
    model = RandomForestClassifier(n_estimators=100)

    # Train model on training data
    model.fit(X_train, y_train)

    # Make predictions on test data
    y_pred = model.predict(X_test)

    # Calculate accuracy of predictions
    accuracy = accuracy_score(y_test, y_pred)

    # Assert that accuracy is greater than 0.9
    assert accuracy > 0.9


test_random_forest()  # Run the function

print("Test passed")  # Print a message if the function passes without errors


Test passed


Integration Testing

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


def test_model_performance():
    # Load test data
    test_data = pd.read_csv('heart4.csv')

    # Get input features and target variable
    X = test_data.drop('TenYearCHD', axis=1)
    y = test_data['TenYearCHD']

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42)

    # Fit logistic regression model to training data
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)

    # Make predictions on test data
    y_pred = model.predict(X_test)

    # Calculate accuracy score
    accuracy = accuracy_score(y_test, y_pred)

    # Check that accuracy score is above a certain threshold
    assert accuracy >= 0.75


test_model_performance()  # Run the function

print("Test passed")  # Print a message if the function passes without errors


Test passed


System testing

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


def test_classification_pipeline():
    # Load test data
    test_data = pd.read_csv('heart4.csv')

    # Get input features and target variable
    X = test_data.drop('TenYearCHD', axis=1)
    y = test_data['TenYearCHD']

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42)

    # Define pipeline
    pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler()),
        ('classifier', RandomForestClassifier(random_state=42))
    ])

    # Fit pipeline to training data
    pipeline.fit(X_train, y_train)

    # Make predictions on test data
    y_pred = pipeline.predict(X_test)

    # Calculate accuracy score
    accuracy = accuracy_score(y_test, y_pred)

    # Check that accuracy score is above a certain threshold
    assert accuracy >= 0.75


test_classification_pipeline()  # Run the function

print("Test passed")  # Print a message if the function passes without errors


Test passed


In [None]:
import unittest


class TestHeartDiseasePrediction(unittest.TestCase):

    def test_preprocess_data(self):
        # Load test data
        test_data = pd.read_csv('heart4.csv')

        # Get input features and target variable
        X = test_data.drop('TenYearCHD', axis=1)
        y = test_data['TenYearCHD']

        # Call function to preprocess test data
        preprocessed_data = preprocess_data(X)

        # Check that preprocessed_data has the same number of rows and columns as X
        self.assertEqual(preprocessed_data.shape, X.shape)

        # Check that there are no missing values in preprocessed_data
        self.assertEqual(preprocessed_data.isnull().sum().sum(), 0)

        # Check that preprocessed_data has the correct data type
        self.assertEqual(preprocessed_data.dtypes.unique(),
                         np.dtype('float64'))

    def test_logistic_regression(self):
        # Load iris dataset
        iris = load_iris()
        X = iris.data
        y = iris.target

        # Split data into training and test sets
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42)

        # Create logistic regression model
        model = LogisticRegression()

        # Train model on training data
        model.fit(X_train, y_train)

        # Make predictions on test data
        y_pred = model.predict(X_test)

        # Calculate accuracy of predictions
        accuracy = accuracy_score(y_test, y_pred)

        # Assert that accuracy is greater than 0.9
        self.assertGreater(accuracy, 0.9)


if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)
