# Use Our Classifier for Inference

In [1]:
import torch

import loader
import models
import utility

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda


In [2]:
filename_category_mapping, category_label_mapping = loader.load_labeling_mappings()
filename_label_mapping = filename_category_mapping
for key, value in filename_label_mapping.items():
    filename_label_mapping[key] = int(category_label_mapping[value])

label_to_category_mapping = {v: k for k, v in category_label_mapping.items()}

In [3]:
filename = 'classifier_47k_11232024_211122.pth'
model = models.Classifier.load_model(filename).to(device)

Model loaded from model_weights\classifier_47k_11232024_211122.pth


  model_info = torch.load(filepath)


In [4]:
def philosophize_this(inference_df, model, device, label_to_category_mapping, print_info=False):

    def logits_to_prediction(logits, label_to_category_mapping):
        sorted_indices = torch.argsort(logits, descending=True)    
        predictions = [(label_to_category_mapping[str(idx.item())], logits[idx].item()) for idx in sorted_indices]
        return predictions

    full_logits = torch.zeros((len(inference_df), len(label_to_category_mapping)))
    for i, row in inference_df.iterrows():
        model.eval()
        with torch.no_grad():
            logits = model(row['embedding'].to(device)).cpu()
            logits = torch.nn.functional.softmax(logits, dim=-1)
        full_logits[i] = logits
        predictions = logits_to_prediction(logits, label_to_category_mapping)
        
        if print_info:
            print(f"{[f'{c}, {v:.2f}' for c, v in predictions]}")
            print(row['chunk_text'])
            print(f"{'='*100}\n")

    final_prediction = logits_to_prediction(full_logits.mean(dim=0), label_to_category_mapping)
    print(f"Final philosophical prediction of your input text:\n{[f'{c}, {v:.2f}' for c, v in final_prediction]}")

In [10]:
text_file = 'data/inference/rtwbackpack.txt'
with open(text_file, 'r', encoding='utf-8') as file:
    inference_text = file.read()

# inference_df = loader.embed_texts(inference_text, chunk_size=2000, chunk_overlap=50, print_info=False)
# philosophize_this(inference_df, model, device, label_to_category_mapping, print_info=True)

# chunking_sizes = [2000]
chunking_sizes = [100, 200, 500, 1000]

for chunk_size in chunking_sizes:
    inference_df = loader.embed_texts(inference_text, chunk_size=chunk_size, chunk_overlap=50, print_info=False)
    print(f"Chunk size: {chunk_size}")
    philosophize_this(inference_df, model, device, label_to_category_mapping, print_info=False)
    print(f"{'='*100}\n")

Chunk size: 100
Final philosophical prediction of your input text:
['transcendentalism, 0.37', 'epicureanism, 0.17', 'absurdism, 0.16', 'stoicism, 0.12', 'buddhism, 0.06', 'existentialism, 0.06', 'rationalism, 0.03', 'confucianism, 0.02', 'taoism, 0.01']

Chunk size: 200
Final philosophical prediction of your input text:
['transcendentalism, 0.39', 'epicureanism, 0.19', 'buddhism, 0.14', 'absurdism, 0.12', 'stoicism, 0.08', 'rationalism, 0.06', 'taoism, 0.01', 'existentialism, 0.01', 'confucianism, 0.01']

Chunk size: 500
Final philosophical prediction of your input text:
['transcendentalism, 0.59', 'stoicism, 0.18', 'epicureanism, 0.15', 'confucianism, 0.02', 'absurdism, 0.02', 'rationalism, 0.01', 'taoism, 0.01', 'buddhism, 0.01', 'existentialism, 0.00']

Chunk size: 1000
Final philosophical prediction of your input text:
['transcendentalism, 0.45', 'absurdism, 0.18', 'taoism, 0.17', 'epicureanism, 0.07', 'stoicism, 0.07', 'buddhism, 0.02', 'confucianism, 0.02', 'rationalism, 0.01', 