In [12]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score, average_precision_score, classification_report

def adjust_features(features, target_feature_count):
    current_feature_count = features.shape[1]
    if current_feature_count < target_feature_count:
        missing_features = target_feature_count - current_feature_count
        zeros = np.zeros((features.shape[0], missing_features))
        features = np.hstack((features, zeros))
    elif current_feature_count > target_feature_count:
        features = features[:, :target_feature_count]
    return features

def load_data(filename, target_feature_count=None):
    data = np.load(filename)
    features = data[:, :-1]
    labels = data[:, -1]
    if target_feature_count is not None:
        features = adjust_features(features, target_feature_count)
    return features, labels


def train_decision_tree(X_train, y_train):
    classifier = DecisionTreeClassifier()
    classifier.fit(X_train, y_train)
    return classifier



def evaluate_model(model, X_test, y_test):
    predictions = model.predict(X_test)
    probabilities = model.predict_proba(X_test)[:, 1]  # Probabilities for the positive class

    auroc = roc_auc_score(y_test, probabilities)
    auprc = average_precision_score(y_test, probabilities)

    print("AUROC (Area Under ROC):", auroc)
    print("AUPRC (Area Under Precision-Recall Curve):", auprc)
    print("Classification Report:")
    print(classification_report(y_test, predictions))

def main():      
    X_train, y_train = load_data('D:\\data\\PCA_data\\classification\\rh_data_pca.npy')
    feature_count = X_train.shape[1]
    X_test, y_test = load_data('D:\\data\\PCA_data\\regression\\stitch_data_900_pca.npy', target_feature_count=feature_count)

    model = train_decision_tree(X_train, y_train)
    evaluate_model(model, X_test, y_test)

if __name__ == "__main__":
    main()
