In [3]:
## Adapt document features function to account for bigrams combined with SentiWordNet sentiment scores.

def doc_features_with_swn_sentiment_scores_bigrams(document, word_features): 
    
    # Get the SentiWordNet pos and neg summed scores for the document (a list of tokens)
    positive_score, negative_score = get_sentiwordnet_scores_from_tokens(document)
  
    # Remove duplicate words from the document (a tokenized line of poetry)
    document_words = set(document)  
    # Extract the set of bigrams from the document
    document_bigrams = set(bigrams(document))
    
    # Create a features dictionary to represent the word features
    features = {}
    # Iterate over the top N vocabulary words (word_features) and create a dict-key for that word, with the dict-value signalling whether the
    # word or bigram occurs in the document (line of poetry) or not.
    # Also add new key-value pairs to the features dictionary indicating the sentiment scores of the word in question
    for word in word_features:
        features[f"contains({word})"] = (word in document_words or word in document_bigrams)

    # Add features representing swn positive and negative scores
    features["positive_sentiment"] = positive_score
    features["negative_sentiment"] = negative_score
    
    return features


"""
    A function that takes in a range of values for the different nr of most common words to use as features
    and then calculates the accuracies and average f1-scores for each nr of most common words.
    In contrast to the `afinn_calculate_metrics_for_different_vocab_size_features`, this function
    uses `doc_features_with_swn_sentiment_scores`  instead of the `doc_features_with_afinn_sentiment_scores` function
    to use SentiWordNet summed positive and negative scores instead of AFINN sentiment scores.
    This version of the function also includes bigrams as features.
"""
def swn_calculate_metrics_for_different_vocab_size_features_with_bigrams(
    lowest_num_words_limit, # lower end of range for how many words to use
    highest_num_words_limit, # higher end of range for how many words to use
    all_grams, # the freq dist of unigrams and bigrams ordered by most common to least common
    train_tuples, # training data tuples of form (sample, label)
    val_tuples,
    step_size=50, # interval size between numbers of words to test
):
    top_word_counts = np.arange(lowest_num_words_limit, highest_num_words_limit, step_size)
    accuracies = [] # store accuracies for each nr of top words used in here
    avg_f1_scores = [] # store macro f1 scores for each nr of top words used in here

    # iterate over the array of top-word counts to use (i.e. vocab subset to use in features)
    for vocab_size in top_word_counts:
        print(vocab_size)
        # store the list of top "vocab_size" words to use
        word_features = list(all_grams)[:vocab_size]
        # get the featuresets based on the top N word features for training and validation splits
        train_data_featuresets = [(doc_features_with_swn_sentiment_scores_bigrams(doc, word_features), label) for (doc, label) in train_tuples]
        validation_data_featuresets = [(doc_features_with_swn_sentiment_scores_bigrams(doc, word_features), label) for (doc, label) in val_tuples]
        # train a NB classifier and append accuracy score to the above-defined list
        NBclassifier = nltk.NaiveBayesClassifier.train(train_data_featuresets)
        accuracy = nltk.classify.accuracy(NBclassifier, validation_data_featuresets)
        accuracies.append(accuracy)
        # # now get the macro avg f1 scores (more complicated)
        # # store all the predicted labels here
        validation_predictions= []

        # Iterate over each validation featurset and get the predicted label
        for features_dict, label in validation_data_featuresets:
            predicted_label = NBclassifier.classify(features_dict)
            validation_predictions.append(predicted_label)
    
        # Retrieve the macro-average F1 score from classification report and store it in avg_f1_scores
        class_report = classification_report(
            original_dataset_validation_labels, 
            validation_predictions,
            output_dict=True,  # Return report as a dictionary (easier to access metrics)
            # Set the score to 0 if "UndefinedMetricWarning" appears because either recall or precision for a class are 0.0
            zero_division=0  
        )
        
        macro_avg_f1 = class_report['macro avg']['f1-score'] 
        avg_f1_scores.append(macro_avg_f1)
    return top_word_counts, accuracies, avg_f1_scores

In [None]:
wordnet_negated_train_tokens_with_bigrams = [list(bigrams(sample)) + sample for sample in wordnet_negated_train_tokens]
wordnet_negated_validation_tokens_with_bigrams = [list(bigrams(sample)) + sample for sample in wordnet_negated_validation_tokens]