In [None]:
import json
from utils import import_annotated_json, ModelType, map_string_to_model_type, read_tuning_hyperparameters
from cross_validation import cross_validation, nested_cross_validation
from models.bayes_classifier import NaiveBayes, NaiveBayesCombined
from models.logistic_regression_classifier import LogisticRegression, LogisticRegressionCombined
from models.svm_classifier import SupportVectorMachine, SupportVectorMachineCombined, SupportVectorMachineOneVsRest

df = import_annotated_json()


In [None]:
def tune_bayes(preprocess_params, category, type, results):
    if preprocess_params['preprocess_type'] != 'bag_of_words' or preprocess_params['binary'] == True:
        return
    _, accuracy = nested_cross_validation(df.hotel_review.to_frame(), df[category].to_frame(), NaiveBayes(map_string_to_model_type(type)), preprocess_params)
    results[str(preprocess_params)] = accuracy
    print(f'{str(preprocess_params)}: Accuracy is {accuracy}')
        

In [None]:
def tune_logistic_regression(preprocess_params, category, type, results):
    hypers = read_tuning_hyperparameters('logistic_regression', type.lower)
    for mini_batch_size in hypers['mini_batch_size']:
        for num_of_iterations in hypers['num_of_iterations']:
            for learning_rate in hypers['learning_rate']:
                for regularization_type in hypers['regularization_type']:
                    for reg_lambda in hypers['lambda']:
                        _, accuracy = nested_cross_validation(df.hotel_review.to_frame(), df[category].to_frame(), LogisticRegression(map_string_to_model_type(type)), preprocess_params)
                        results[str(preprocess_params)] = accuracy
                        print(f'{str(preprocess_params)}: Accuracy is {accuracy}')

In [None]:
def tune_svm(preprocess_params, category, type, results):
    hypers = read_tuning_hyperparameters('svm', type.lower)
    for mini_batch_size in hypers['mini_batch_size']:
        for num_of_iterations in hypers['num_of_iterations']:
            for learning_rate in hypers['learning_rate']:
                for regularization_type in hypers['regularization_type']:
                    for cost_function_type in hypers['cost_function_type']:
                        for C in hypers['C']:
                            _, accuracy = nested_cross_validation(df.hotel_review.to_frame(), df[category].to_frame(), SupportVectorMachine(map_string_to_model_type(type)), preprocess_params)
                            results[str(preprocess_params)] = accuracy
                            print(f'{str(preprocess_params)}: Accuracy is {accuracy}')

In [None]:
def tune_preprocessing_options(model: str, category: str, type: str):
    print(f'CATEGORY: {category}')
    print('--------------------------------------------')
    print(f'MODEL: {model.upper()} CLASSIFIER')
    print('--------------------------------------------')
    print(f'MODEL TYPE: {type}')
    results = {}
    for preprocess_type in ["bag_of_words", "tf_idf", "tf"]:
        for binary in [True, False]:
            for lowercase in [True, False]:
                for ngram in [1, 2, 3]:
                    for stop_words in [True, False]:
                        preprocess_params = {
                            "preprocess_type": preprocess_type, "binary": binary, "lowercase": lowercase, "ngram": ngram, "stop_words": stop_words, "freq_max": 1.0, "freq_min": 1
                        }
                        if model == 'bayes':
                            tune_bayes(preprocess_params, category, type, results)
                        elif model == 'logistic_regression':
                            tune_logistic_regression(preprocess_params, category, type, results)
                        elif model == 'svm':
                            tune_svm(preprocess_params, category, type, results)
                        else:
                            raise Exception('Unknown model type!')
    for key, value in results.items():
        print(f'{key}: {value}')
    with open(f'tuning_history_{model.lower}_{type.lower}_{category.lower}.json', "w") as outfile:
        json.dump(results, outfile)


In [None]:
tune_preprocessing_options('bayes', 'amenities', 'CATEGORY')