In [1]:
import data_loading
import data_resolution_prep
import optimization
import cell_type_predict
import label_prediction_eval

2024-07-12 13:27:18.563591: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def classification(prediction_techniques, voting_methods, classifiers, target_column_name):
    # Required files are exported
    expression_file = "Variance_Estimation_2000_full.h5ad"
    train_sample_file = "train_sample_names"
    test_sample_file = "test_sample_names"
    validation_set1_file = "validation_sample_names1"
    validation_set2_file = "validation_sample_names2"
    
    #Data loading for before further steps
    expression_matrix, train_sample_names, test_sample_names, validation_sample_names1, validation_sample_names2 = data_loading.data_loading_processing(
        expression_file,
        train_sample_file,
        test_sample_file,
        validation_set1_file,
        validation_set2_file,
        target_column_name
    )
    
    #According to resolution type this function calls the resolution preperation function
    for technique in prediction_techniques:
        if technique == 'Bulk':
            X_train, y_train, X_test, test_sample_labels, X_val1, y_val1, X_val2, y_val2 = data_resolution_prep.bulk_sample_prep(
                expression_matrix,
                train_sample_names,
                test_sample_names,
                validation_sample_names1,
                validation_sample_names2,
                target_column_name
            )
        elif technique == 'Pseudobulk':
            X_train, y_train, X_test, y_test, X_val1, y_val1, X_val2, y_val2, train_sample_labels, test_sample_labels, val_sample_labels1, val_sample_labels2 = data_resolution_prep.pseudobulk_sample_prep(
                expression_matrix,
                train_sample_names,
                test_sample_names,
                validation_sample_names1,
                validation_sample_names2,
                target_column_name
            )
        elif technique == 'Single cell':
            X_train, y_train, X_test, y_test, X_val1, y_val1, X_val2, y_val2, test_sample_labels = data_resolution_prep.singlecell_sample_prep(
                expression_matrix,
                train_sample_names,
                test_sample_names,
                validation_sample_names1,
                validation_sample_names2,
                target_column_name
            )
        else:
            print('Prediction method is not applicable')
            continue
            
        # Each classification method were iterated over, optimized and trained with corresponding hyperparameter optimization 
        #function

        random_seed = 42

        for classifier in classifiers:
            print(classifier)
            trained_classifier, calibrated_classifier, training_time = optimization.hyperparameter_optimization(
                X_train, y_train, X_val1, y_val1, classifier
            )
            # For each method and each voting method, classification results were evaluated and stored.
            for voting_method in voting_methods:
                label_prediction_eval.classifier_evaluation(
                    test_sample_labels,
                    training_time,
                    classifier,
                    expression_matrix,
                    voting_method,
                    technique,
                    trained_classifier,
                    calibrated_classifier,
                    test_sample_names,
                    X_test,
                    validation_sample_names2,
                    target_column_name,
                    output_file='OUTPUT_FILE'
                )


In [17]:
def classification(prediction_techniques, voting_methods, classifiers , target_column_name):
    expression_file = "Variance_Estimation_2000_full.h5ad"
    train_sample_file = "train_sample_names"  
    test_sample_file = "test_sample_names" 
    validation_set1_file = "validation_sample_names1"
    validation_set2_file = "validation_sample_names2"

    expression_matrix, train_sample_names, test_sample_names, validation_sample_names1, validation_sample_names2 = data_loading.data_loading_processing(expression_file, 
                                                                                                            train_sample_file,
                                                                                                            test_sample_file, 
                                                                                                            validation_set1_file,
                                                                                                            validation_set2_file, 
                                                                                                            target_column_name)
   
    for technique in prediction_techniques:
        if technique == 'Bulk':
            X_train, y_train, X_test, test_sample_labels, X_val1, y_val1, X_val2, y_val2 = data_resolution_prep.bulk_sample_prep(expression_matrix, 
                                                                                                train_sample_names,
                                                                                                test_sample_names,
                                                                                                validation_sample_names1,
                                                                                                validation_sample_names2,
                                                                                                target_column_name)
        elif technique == 'Pseudobulk':
            X_train, y_train, X_test, y_test, X_val1, \
            y_val1, X_val2, y_val2, \
            train_sample_labels, test_sample_labels, val_sample_labels1, val_sample_labels2 = data_resolution_prep.pseudobulk_sample_prep(expression_matrix, 
                        train_sample_names, test_sample_names, validation_sample_names1, validation_sample_names2, target_column_name)
        elif technique == 'Single cell':
            X_train, y_train, X_test, y_test, X_val1, y_val1, X_val2, y_val2, test_sample_labels = data_resolution_prep.singlecell_sample_prep(expression_matrix, train_sample_names, test_sample_names, validation_sample_names1,
                            validation_sample_names2,target_column_name)
        else:  
            print('Prediction method is not applicable')
            continue
        random_seed = 42
        for classifier in classifiers:
            print(classifier)
            trained_classifier, calibrated_classifier, training_time = optimization.hyperparameter_optimization(X_train, y_train, X_val1, y_val1, classifier)
            
            for voting_method in voting_methods:
                label_prediction_eval.classifier_evaluation(
                    test_sample_labels, 
                    training_time, 
                    classifier, 
                    expression_matrix, 
                    voting_method, 
                    technique, 
                    trained_classifier, 
                    calibrated_classifier, 
                    test_sample_names, 
                    X_test, 
                    validation_sample_names2,
                    target_column_name,
                    output_file='OUTPUT_FILE')
            

In [None]:

prediction_techniques = ['Bulk','Pseudobulk']
voting_methods = ['Majority Voting']

classifiers = ['Decision tree', 'Random forest', 'SVM linear', 'SVM polynomial', 'SVM radial basis function', 'NuSVM linear' ,
             'NuSVM polynomial',  'NuSVM radial basis function','LinearSVC' ,  'MLP']
                  
                  
classification(prediction_techniques, voting_methods, classifiers, 'Cognitive Status')

In [None]:
prediction_techniques = ['Single Cell']
voting_methods = ['Majority Voting', 'Majority Voting', 'Majority Voting Top6', 'Weighted Voting', 'Transformed Weighted Voting' ]

classifiers = ['Decision tree', 'Random forest', 'SVM linear', 'SVM polynomial', 'SVM radial basis function', 'NuSVM linear' ,
             'NuSVM polynomial',  'NuSVM radial basis function','LinearSVC' ,  'MLP']
                  
classification(prediction_techniques, voting_methods, classifiers, 'Cognitive Status')