# Diagnostic Systems

### Three different classifiers are implemented to analyze tradeoff between interpretability and performance.


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

df = pd.read_pickle('wdbc.pkl')

X = df[df.columns[2:]]
y = df['malignant']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [3]:
def evaluate_classifier(classifier, X_test, y_test):
    y_pred = classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    confusion = confusion_matrix(y_test, y_pred)

    df_stats = pd.DataFrame({
        'Accuracy': [accuracy],
        'Precision': [precision],
        'Recall': [recall]
    })

    df_confusion = pd.DataFrame(confusion, columns=['Predicted Benign', 'Predicted Malignant'], index=['Actual Benign', 'Actual Malignant'])
    
    print(df_stats)
    print(df_confusion)

# 1. Rule-based classifier
* If [cell size is abnormal]:
* or [cell shape is abnormal]
* or [cell texture is abnormal]
* or [cell homogeneity is abnormal], 
* then: diagnosis is malignant, 
* otherwise: diagnosis is benign.

In [4]:
class RuleBasedClassifier:
    def __init__(self, features):
        # Initialize a dictionary, set initial values to 0
        self.thresholds = {feature: 0 for feature in features}

    def fit(self, X, y):
        # We find the thresholds by finding the mean
        # of a malignant sample and a benign sample
        # and taking the midpoint as the threshold.
        for feature in self.thresholds:
            malignant_mean = X[y==1][feature].mean()
            benign_mean = X[y==0][feature].mean()
            self.thresholds[feature] = (malignant_mean + benign_mean) / 2
    
    def predict(self, X):
        # Use numpy operations rather than just
        # a for loop, as these are more optimized.
        over_thresholds = np.array(
            [X[feature] > self.thresholds[feature] for feature in self.thresholds.keys()]
        )
        res = np.any(over_thresholds, axis=0).astype(int)

        return res
    
    def score(self, X, y):
        return np.mean(self.predict(X) == y)

In [5]:
rule_based_classifier = RuleBasedClassifier([
    "radius_2",
    "concavity_2",
    "texture_2",
    "smoothness_2"
])
rule_based_classifier.fit(X_train, y_train)
evaluate_classifier(rule_based_classifier, X_test, y_test)

   Accuracy  Precision   Recall
0  0.614035   0.488095  0.97619
                  Predicted Benign  Predicted Malignant
Actual Benign                   29                   43
Actual Malignant                 1                   41


Accuracy: Proportion of correct predictions
Precision: True positives predictions out of all positive predictions. 
    - Tells how reliable the positive predictions are. High precision: predicted possible outcome is usually correct.
Recall: Proportion of actual positive samples that were correctly identified by the model. 
    - Tells you how well the model detects positive cases. High recall means the model is good at identifying positive samples, but it may also increase false positives. 

A lot of false positives in the confusion matrix. At least thats better than false negatives in our context (healtcare).

# 2. Random Forest Classifier

In [6]:
from sklearn.ensemble import RandomForestClassifier

classifier_rf = RandomForestClassifier(n_estimators=100, max_depth=None, random_state=1)
classifier_rf.fit(X_train, y_train)

evaluate_classifier(classifier_rf, X_test, y_test)

   Accuracy  Precision    Recall
0   0.95614        1.0  0.880952
                  Predicted Benign  Predicted Malignant
Actual Benign                   72                    0
Actual Malignant                 5                   37


# 3. Logistic Regression Classifier:
#### Attempts to trade off interpretability and classification performance. Logistic Regression has a probabilistic output which can be used to understand how confident the model is in its predictions, which is one way to define interpretability. The weights of each feature can also be accessed which gives insight into their importance to the outcome. First, logistic regression is implemented from scratch. Then scikit-learns LogisticRegression is used.

In [7]:
from sklearn.linear_model import LogisticRegression

lr_classifier = LogisticRegression(solver='liblinear', penalty='l1', ) # Using L2 regularization
lr_classifier.fit(X_train, y_train)
print(lr_classifier.coef_)

evaluate_classifier(lr_classifier, X_test, y_test)

[[-4.43805758  0.          0.32823369  0.0064169   0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.02533745  0.          0.          0.          0.
   0.          0.         -0.26688716  0.17058242  0.07087087  0.02301815
   0.          0.          3.15178865  0.          0.          0.        ]]
   Accuracy  Precision    Recall
0  0.964912        1.0  0.904762
                  Predicted Benign  Predicted Malignant
Actual Benign                   72                    0
Actual Malignant                 4                   38




In [8]:
class LogitRegression:
    def __init__(self, learning_rate=1e-1, max_iter=1000):
        self.learning_rate = learning_rate
        self.max_iter = max_iter
    
    def fit(self, X, y):
        self.n_samples, self.n_features = X.shape

        self.weights = np.zeros(self.n_features)
        self.b = 0

        self.X = X
        self.y = y

        for _ in range(self.max_iter):
            # compute predicted values (logistc regression hypothesis)
            A = 1 / (1 + np.exp(-(self.X.dot(self.weights) + self.b)))

            # compute gradients
            tmp = A - self.y # Difference between predicted and actual values
            dW = np.dot(self.X.T, tmp) / self.n_samples
            db = np.sum(tmp) / self.n_samples

            # update weights
            self.weights = self.weights - self.learning_rate * dW
            self.b = self.b - self.learning_rate * db
            
        return self
    
    def predict(self, X):
        A = 1 / (1 + np.exp(- (X.dot(self.weights) + self.b)))
        predicted_labels = np.array([1 if p >= 0.5 else 0 for p in A])
        return predicted_labels
    
    def predict_probabilities(self, X):
        A = 1 / (1 + np.exp(-(X.dot(self.weights) + self.b)))
        return A 
    
    def score(self, X, y):
        return np.mean(self.predict(X) == y)

In [12]:
columns = [column for column in X.columns if "_2" in column]

lr2_classifier = LogitRegression(learning_rate=1e-4, max_iter=1000)
lr2_classifier.fit(X_train, y_train)

print(lr2_classifier.weights)
evaluate_classifier(lr2_classifier, X_test, y_test)


[-2.94838314e-02 -4.13824581e-02 -1.69573302e-01 -6.36108821e-02
 -2.75758091e-04  1.62055426e-04  5.86971411e-04  2.40257976e-04
 -5.24297096e-04 -2.16288490e-04 -2.48958857e-04 -2.66630485e-03
  5.53464490e-04  6.93350815e-02 -1.40665172e-05  5.48151057e-05
  9.38729510e-05  1.27095540e-05 -4.07096427e-05 -1.61594625e-06
 -3.15699909e-02 -5.20735136e-02 -1.70565440e-01  9.34413571e-02
 -3.53981875e-04  6.49020106e-04  1.27152414e-03  2.88932726e-04
 -7.20633766e-04 -1.99521545e-04]
   Accuracy  Precision    Recall
0  0.938596   0.948718  0.880952
                  Predicted Benign  Predicted Malignant
Actual Benign                   70                    2
Actual Malignant                 5                   37
