In [1]:
from torch import nn
import sys
from src.API.AI_API_Call import openai_topic_generation, gemini_generation, openai_generation, \
    gemini_embedding_generation, words_to_pos_types
from src.API.text_classifier import TextData
from src.API.constant import AI, HUMAN, OPENAI_MODEL_NAME, GEMINI_MODEL_NAME
from src.API.machine_learning import k_mean_cluster_fit_predict, load_kmeans_model, \
    hierarchical_cluster_fit_predict, load_hierarchical_model, load_autoencoders, \
    autoencoder_classify_text, evaluate_sequence, load_transition_matrix, Autoencoder

In [2]:
EMBEDDING_CLASSIFICATION = 'embedding_classification'
k_mean_file_path = './model_file/kmean.pkl'
hierarchical_file_path = './model_file/hierarchical.pkl'
autoencoder_file_path = './model_file/autoencoders.pth'
human_transition_matrix_file_path = './model_file/human_matrix.pkl'
ai_transition_matrix_file_path = './model_file/AI_matrix.pkl'

In [3]:
class KMeanModel:
    def __init__(self, model, feature_array, cluster_majority):
        self.model = model
        self.feature_array = feature_array
        self.cluster_majority = cluster_majority


class HierarchicalModel:
    def __init__(self, agg_cluster, feature_array, cluster_majority, cluster_centroid):
        self.agg_cluster = agg_cluster
        self.feature_array = feature_array
        self.cluster_majority = cluster_majority
        self.cluster_centroid = cluster_centroid


class AutoencoderModel:
    def __init__(self, encoder, optimizer, criterion, input_dim, latent_dim):
        self.encoder = encoder
        self.optimizer = optimizer
        self.criterion = criterion
        self.input_dim = input_dim
        self.latent_dim = latent_dim

In [4]:
def load_models(
        kmeans_model_file_path: str,
        hierarchical_model_file_path: str,
        autoencoder_file_path: str,
        human_transition_matrix_file_path: str,
        ai_transition_matrix_file_path: str):
    """
    Load the KMeans, Hierarchical, Autoencoder, and transition matrix models.

    """
    # Load KMeans model
    kmeans_model_data = load_kmeans_model(kmeans_model_file_path)
    kmeans_model = KMeanModel(kmeans_model_data['kmeans'], kmeans_model_data['feature_array'],
                              kmeans_model_data['cluster_majorities'])
    # Load Hierarchical model
    hierarchical_model = load_hierarchical_model(hierarchical_model_file_path)
    hierarchical_model = HierarchicalModel(hierarchical_model['agg_cluster'], hierarchical_model['feature_array'],
                                           hierarchical_model['cluster_majorities'],
                                           hierarchical_model['cluster_centroids'])

    # Load Autoencoder model
    # Load the trained autoencoders
    h_encoder, AI_encoder, h_optimizer, AI_optimizer, metadata = load_autoencoders(autoencoder_file_path)

    # Extract input and latent dimensions
    input_dim = metadata["input_dim"]
    latent_dim = metadata["latent_dim"]

    # Define loss functions
    h_criterion = nn.MSELoss()
    AI_criterion = nn.MSELoss()
    # Create Autoencoder model instances
    h_autoencoder = AutoencoderModel(h_encoder, h_optimizer, h_criterion, input_dim, latent_dim)
    AI_autoencoder = AutoencoderModel(AI_encoder, AI_optimizer, AI_criterion, input_dim, latent_dim)

    # Load transition matrix
    human_transition_matrix = load_transition_matrix(human_transition_matrix_file_path)
    ai_transition_matrix = load_transition_matrix(ai_transition_matrix_file_path)

    return kmeans_model, hierarchical_model, h_autoencoder, AI_autoencoder, human_transition_matrix, \
        ai_transition_matrix

In [5]:
def handle_user_input(user_input: str = None):
    """
    Handles user input for the AI API call.
    """
    # Get user input if not provided
    if user_input is None:
        user_input = input("Enter your article you would like to test: ")

    # Generate the topic using the OpenAI model
    topic = openai_topic_generation(user_input)
    print(f"Topic generated: {topic}")
    # Generate the article using the Gemini model
    gemini_response = gemini_generation(topic)
    openai_response = openai_generation(topic)

    # Generate embeddings for the user input
    embedding_response = gemini_embedding_generation(user_input)
    embedding_response_OAI = gemini_embedding_generation(openai_response)
    embedding_response_GEMINI = gemini_embedding_generation(gemini_response)

    # Generate POS types for the user input
    pos_types = words_to_pos_types(user_input)
    pos_types_OAI = words_to_pos_types(openai_response)
    pos_types_GEMINI = words_to_pos_types(gemini_response)

    # Create text data objects for each response
    text_data = TextData.create_from_user_input(user_input, pos_types, embedding_response)
    text_data_OAI = TextData.create_from_user_input(openai_response, pos_types_OAI, AI, embedding_response_OAI,
                                                    GEMINI_MODEL_NAME)
    text_data_GEMINI = TextData.create_from_user_input(gemini_response, pos_types_GEMINI, AI, embedding_response_GEMINI,
                                                       OPENAI_MODEL_NAME)

    return text_data, text_data_OAI, text_data_GEMINI, topic

In [6]:
def classify_text(text_data: TextData, k_mean_model: KMeanModel, hierarchical_model: HierarchicalModel,
                  h_autoencoder: AutoencoderModel, AI_autoencoder: AutoencoderModel,
                  human_transition_matrix, ai_transition_matrix):
    """
    Classify the text data using KMeans, Hierarchical clustering, and Autoencoders.
    """
    # KMeans Classification
    k_mean_classification = k_mean_cluster_fit_predict(k_mean_model.model, text_data, EMBEDDING_CLASSIFICATION,
                                                       k_mean_model.feature_array)
    # Hierarchical Classification
    hierarchical_classification = hierarchical_cluster_fit_predict(hierarchical_model.agg_cluster,
                                                                   text_data,
                                                                   EMBEDDING_CLASSIFICATION,
                                                                   hierarchical_model.cluster_majority,
                                                                   hierarchical_model.cluster_centroid)

    # Autoencoder Classification
    autoencoder_classification = autoencoder_classify_text(text_data.embedding_classification, h_autoencoder.encoder,
                                                           AI_autoencoder.encoder, h_autoencoder.criterion,
                                                           AI_autoencoder.criterion, )

    ai_transition_probability = evaluate_sequence(text_data.pos_transition_matrix, ai_transition_matrix)
    human_transition_probability = evaluate_sequence(text_data.pos_transition_matrix, human_transition_matrix)
    # if the human_transition_probability is greater than the ai_transition_probability, then the text is classified
    # as human
    if human_transition_probability > ai_transition_probability:
        transition_classification = HUMAN
    else:
        transition_classification = AI

    # Find the classification based on the vote
    classification = text_data.classify_with_vote(k_mean_classification, hierarchical_classification,
                                                  autoencoder_classification, transition_classification)

    return classification, k_mean_classification, hierarchical_classification, autoencoder_classification, \
        transition_classification

In [7]:
def main(user_input_file_path: str = None):
    """
    Main function to load models, handle user input, and classify text.
    """
    # Load models
    kmeans_model, hierarchical_model, h_autoencoder, AI_autoencoder, human_transition_matrix, ai_transition_matrix = \
        load_models(k_mean_file_path, hierarchical_file_path, autoencoder_file_path,
                    human_transition_matrix_file_path, ai_transition_matrix_file_path)

    # Read user input for file
    if user_input_file_path is None:
         user_input = None  
    else:
        with open(user_input_file_path, 'r') as file:
            # read the file content
            user_input = file.read()
    text_data, text_data_OAI, text_data_GEMINI, topic = handle_user_input(user_input=user_input)

    # Classify text data
    classification, k_mean_classification, hierarchical_classification, autoencoder_classification, \
        transition_classification = classify_text(text_data, kmeans_model, hierarchical_model,
                                                  h_autoencoder, AI_autoencoder,
                                                  human_transition_matrix, ai_transition_matrix)

    print(f"Classification: {classification}")
    print(f"KMeans Classification: {k_mean_classification}")
    print(f"Hierarchical Classification: {hierarchical_classification}")
    print(f"Autoencoder Classification: {autoencoder_classification}")
    print(f"Transition Classification: {transition_classification}")

In [8]:
main("./test.txt")
    