In [8]:
import json
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer


# Load pre-trained sentence transformer model
model_dir = "/Users/tomasnagy/FastAPI/application/models/embeddings/all-MiniLM-L6-v2"
model =  SentenceTransformer(model_dir)

# Predefined 30 topic labels
topic_labels = [
    "Large Language Model", "Open-source LLMs", "LLM", "Mistral", "LLMs", "LLM-based", "LLM-powered",
    "GPT", "RAG", "GPT-4", "GPT-3", "GPT-2", "ChatGPT", "GPT model", "Llama",
    "Generative Pre-trained Transformer (GPT)", "BERT", "RoBERTa", "DistilBERT",
    "ALBERT", "Knowledge distillation", "Transformer-based model",
    "Neural Language Model", "Transformers", "Attention mechanism", "Prompting",
    "Chain of thought", "Scaling law", "Model Bias and Fairness", "Foundation Model",
    "AI model", "AI language model", "NLP model", "Embedding", "contextual Embedding",
    "quantisation", "Fine-tuning", "Zero-shot Learning", "Few-shot Learning", "AI Ethics",
          "Sarcasm Detection in Natural Language Processing",
        "Sentence Embeddings and Semantic Meaning Analysis",
        "Adversarial Attacks on Graph Neural Networks",
        "Adversarial Attacks on Language Models",
        "Adversarial Attacks on Vision Language Models",
        "Legal Language Models Evaluation",
        "Keyphrase Generation and Evaluation",
        "Evaluating Gender Bias in Large Vision-Language Models",
        "Detecting AI-Generated Texts",
        "Large Language Model Safety Evaluation",
        "Jailbreak Attacks on Large Language Models",
        "Large Language Model Security Vulnerabilities and Attacks",
        "Adversarial Attacks on Retrieval-Augmented Generation",
        "Fuzzing with Large Language Models",
        "Negation Understanding in Large Language Models",
        "Large Language Model Hallucinations",
        "Contextualized Word Embeddings for Semantic Shift Detection",
        "Sentence Simplification and Evaluation",
        "Large Language Model Data Curation and Deduplication",
        "Backdoor Attacks on Large Language Models",
        "Data Contamination in Large Language Models",
        "AI-Assisted Code Generation Tools Evaluation",
        "Assessing Creativity in Large Language Models",
        "Argument Mining and Quality Assessment",
        "NLG Evaluation Metrics and Models",
        "Evaluating Large Language Models for Factoid Question Answering",
        "Story Generation and Evaluation",
        "Evaluating Chinese Language Models",
        "Long-Context Language Model Evaluation",
        "Crowdsourcing and Large Language Model Annotation",
        "Attribution in Large Language Models",
        "Large Vision-Language Models Hallucination Analysis",
        "Knowledge Conflicts in Large Language Models",
        "Backdoor Attacks on NLP Models",
        "Out-of-Distribution Detection Methods",
        "Adversarial Attacks on Large Language Models",
        "Evaluating Large Language Models on Multiple Choice Questions",
        "Evaluating Language Models for Question Answering Tasks",
        "Evaluating Large Language Models for NLG Tasks",
        "Evaluating Large Language Models for Reliable Answering",
        "Linguistic Ambiguity in NLP Systems",
        "Large Language Models Development and Evaluation",
        "Large Language Model Evaluation Benchmarks",
        "Linguistic Analysis of AI-Generated Texts",
        "Reasoning Verification and Evaluation",
        "ChatGPT Applications and Evaluations",
        "Evaluating Large Language Models Critique Abilities",
        "Conversational AI and Chatbot Evaluation",
        "AI Safety and Risk Evaluation",
        "Honesty and Helpfulness in Large Language Models",
        "Natural Language Processing Evaluation Metrics",
    "3D Human Pose Estimation",
    "3D Lane Detection in Autonomous Driving",
    "3D Object Detection with LiDAR and Cameras",
    "3D Visual Grounding and Scene Understanding",
    "6D Object Pose Estimation in Robotics",
    "Action Recognition in Videos using Transformers",
    "Adapting Large Language Models to Specialized Domains",
    "Advancements in Topic Modeling and Extraction Techniques",
    "Agricultural Applications of AI and Large Models",
    "AI Debates and Biases in Conversational Systems",
    "AI in Software Engineering and Development",
    "AI Performance on Standardized Exams and Educational Assessments",
    "AI-Assisted Code Generation Tools Evaluation",
    "AI-Assisted Programming and Software Development",
    "AI-Assisted Text Annotation and Labeling",
    "AI-Driven Scientific Discovery and Automation",
    "AI-Generated Educational Crosswords and Language Models",
    "AI-Generated Narrative in Games",
    "AI-Powered Design and Prototyping Tools",
    "AI-powered Tutoring in Programming Education",
    "Analyzing Political Discourse and Ideology",
    "Anomaly Detection in Logs and Workflows",
    "Anomaly Detection with Multimodal Models",
    "Applying Large Language Models in Requirements Engineering",
    "Arabic Language Models Development",
    "Argument Mining and Quality Assessment",
    "Aspect-Based Sentiment Analysis",
    "Assurance Case Automation for Safety-Critical Systems",
    "Astronomy and Astrophysics Research",
    "Audio Captioning and Representation Learning",
    "Audio Encode and Voice Conversion Models",
    "Automated Distractor Generation for Math MCQs",
    "Automated Grading and Assessment with Large Language Models",
    "Automated Penetration Testing with Large Language Models",
    "Automated Program Repair with Large Language Models",
    "Automated Program Verification with Large Language Models",
    "Automated Red Teaming for Large Language Models",
    "Automated Test Generation with Large Language Models",
    "Automated Verilog Generation and Hardware Design",
    "Automating Ontology Generation with Large Language Models",
    "Automating Software Development with Large Language Models",
    "Autonomous Driving Technologies",
    "Brain Age Estimation using Neuroimaging and Machine Learning",
    "Brain Decoding with fMRI and Visual Semantics",
    "Brain Network Analysis for Neurological Disorders",
    "Chart Understanding with Large Foundation Models",
    "Chatbot Conversations and Dialogue Systems",
    "ChatGPT and Academic Integrity",
    "ChatGPT Applications and Evaluations",
    "Citation Analysis and Prediction in Scholarly Work",
    "Climate Change and NLP Applications",
    "Code Clone Detection and Analysis",
    "Code Completion and Retrieval",
    "Code Generation with Large Language Models",
    "Code Generation with Reinforcement Learning",
    "Code Intelligence and Large Language Models",
    "Code Review Automation with LLMs",
    "Code Summarization and API Generation",
    "Code-Mixed Language Processing",
    "Compiler Optimization and Code Translation",
    "Compositional Image Retrieval (CIR)",
    "Computer Vision for Crack and Stress Detection",
    "Content Moderation with Large Language Models",
    "Contrastive Multimodal Embeddings and Retrieval",
    "Conversational AI and Chatbot Evaluation",
    "Conversational AI and Large Language Models",
    "Conversational AI Safety and Toxicity Detection",
    "Conversational Intent Understanding and Dialogue Systems",
    "Conversational Search and Retrieval Systems",
    "Convolutional Neural Networks and Transformers in Medical Imaging",
    "Crash Severity Analysis with Large Language Models",
    "Cross-Cultural Dialogue and Language Analysis",
    "Cultural Awareness in Multimodal AI Systems",
    "Cybersecurity and Threat Analysis",
    "Cybersecurity Risks and Vulnerabilities in AI Models",
    "Deep Learning for Weather Forecasting",
    "Deep Learning for Wildlife and Image Classification",
    "Defect Classification in Manufacturing",
    "Detecting Misinformation and Bias in Media",
    "Detecting Vulnerabilities in Code with Large Language Models",
    "Diabetic Retinopathy Classification using Deep Learning",
    "Dialogue Systems and Conversational AI",
    "Diffusion Models for Text-Guided Image Generation and Editing",
    "Diffusion-Based Video Generation",
    "Disaster Response using Twitter Data Classification",
    "Discourse Parsing and Corpus Analysis",
    "Document Layout Understanding with Large Language Models",
    "Document-Level Relation Extraction",
    "E-commerce and Retail Analytics",
    "E-commerce Product Attribute Extraction",
    "ECG Interpretation and Diagnosis",
    "Electric Grid Management and Electrification",
    "Electroencephalogram Analysis and Modeling",
    "Emotion Recognition and Analysis in Text",
    "Energy and Engineering Applications of AI",
    "Entity Alignment in Knowledge Graphs",
    "Entity Matching with Large Language Models",
    "Evaluating Language Models for Question Answering Tasks",
    "Evaluating Large Language Models for Factoid Question Answering",
    "Evaluating Large Language Models for NLG Tasks",
    "Event Extraction and Semantic Annotation",
    "Face Forgery Detection and Recognition",
    "Facial Expression Recognition Methods",
    "Fake News Detection in AI-Generated Content",
    "Fake Review Detection and Sentiment Analysis",
    "Fault Diagnosis in Industrial Machinery",
    "Federated Recommendation Systems with Personalization and Privacy",
    "Few-Shot Object Detection",
    "Financial NLP and Large Language Models",
    "Food Computing and Recipe Generation",
    "Galaxy Classification in Astronomy",
    "Game Agents and AI-Driven Game Development",
    "Gaze Estimation and Tracking Systems",
    "Generative AI and its Applications",
    "Generative AI for Urban Mobility and Transportation",
    "Generative AI in Architectural Design",
    "Generative AI in Construction and Industrial Automation",
    "Generative AI in Telecommunications",
    "Generative AI in Virtual Reality",
    "Geospatial Data Processing with Large Language Models",
    "Geospatial Language Models and GeoAI",
    "Gesture Generation from Speech and Text",
    "Grammatical Error Correction with Large Language Models",
    "Handwritten Text Recognition and OCR",
    "Hate Speech Detection and Analysis",
    "Hateful Meme Detection Using Multimodal Analysis",
    "Human Activity Recognition with Wearable Sensors",
    "Human Mobility and Traffic Prediction",
    "Human-Centered Writing Assistance with AI",
    "Human-Object Interaction Detection",
    "Humor Detection and Generation",
    "Image Quality Assessment in Computer Vision",
    "Image Segmentation with Foundation Models",
    "Impact of AI on Software Development",
    "Improving Code Generation with Large Language Models",
    "Information Extraction in NLP",
    "Information Retrieval and Ranking Techniques",
    "Information Retrieval and Summarization Techniques",
    "Instruction Following in Large Language Models",
    "Intelligent Agents and Multi-Agent Systems in AI",
    "Intelligent Tutoring Systems for Knowledge Tracing",
    "IoT Cybersecurity and Threat Detection",
    "Job Title Normalization and Recommendation Systems",
    "Keyphrase Generation and Evaluation",
    "Knowledge Base Question Answering",
    "Knowledge Editing in Large Language Models",
    "Knowledge Graph Construction and Entity Relations",
    "Knowledge Graph-based Question Answering",
    "Knowledge Graphs and Entity Linking",
    "Knowledge Graphs and Large Language Models Integration",
    "Knowledge Retrieval for Large Language Models",
    "Language Models for Tabular Data Analysis",
    "Language-Guided 3D Scene Generation",
    "Large Language Models and Privacy Compliance",
    "Large Language Models and Social Intelligence",
    "Large Language Models for Agent Planning",
    "Large Language Models for Electronic Design Automation",
    "Large Language Models for Hardware Security and Bug Repair",
    "Large Language Models for Low-Resource Languages",
    "Large Language Models for Mobile GUI Testing",
    "Large Language Models for Planning Tasks",
    "Large Language Models for Social Science Classification",
    "Large Language Models for Text Analysis"

]

# Generate topic embeddings
topic_embeddings = model.encode(topic_labels)

# Define similarity threshold
SIMILARITY_THRESHOLD = 0.30

# Load JSONL data
data_path = '/Users/tomasnagy/FastAPI/application/data/filtered_papers_with_extracted_entities.jsonl'
updated_data = []

with open(data_path, 'r') as file:
    for line in file:
        paper = json.loads(line)
        abstract = paper.get('abstract', '')
        if not abstract:
            continue

        abstract_embedding = model.encode([abstract])
        similarities = cosine_similarity(abstract_embedding, topic_embeddings)[0]
        max_similarity_idx = np.argmax(similarities)
        max_similarity = similarities[max_similarity_idx]

        if max_similarity >= SIMILARITY_THRESHOLD:
            paper['human_readable_topic'] = topic_labels[max_similarity_idx]
        else:
            paper['human_readable_topic'] = "Uncategorized"

        updated_data.append(paper)

# Save updated data
updated_data_path = '/Users/tomasnagy/FastAPI/application/data/updated_2025-03-13_merged.jsonl'
with open(updated_data_path, 'w') as outfile:
    for entry in updated_data:
        json.dump(entry, outfile)
        outfile.write('\n')

print(f"Updated data saved to {updated_data_path}")


Updated data saved to /Users/tomasnagy/FastAPI/application/data/updated_2025-03-13_merged.jsonl


In [9]:
import json
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer


# Load pre-trained sentence transformer model
model_dir = "/Users/tomasnagy/FastAPI/application/models/embeddings/all-MiniLM-L6-v2"
model =  SentenceTransformer(model_dir)

# Predefined 30 topic labels
topic_labels = [
    "Modal Logics and Formalization",
    "Federated Recommendation Systems with Personalization and Privacy",
    "Molecular and Protein Representation Learning",
    "Particle Detection and Classification in High Energy Physics",
    "Planar Graph Algorithms and Properties",
    "Algebraic Geometry and Representation Theory",
    "Knowledge Graph Completion Methods",
    "Steganography Techniques and Methods",
    "Group Theory and Embeddings",
    "Fractional Sobolev Embeddings",
    "Humor Detection and Generation",
    "Network Embeddings and Community Detection",
    "Sarcasm Detection in Natural Language Processing",
    "Manifold Learning and Dimensionality Reduction",
    "Hypergraph and Graph Neural Networks",
    "Sentence Embeddings and Semantic Meaning Analysis",
    "Adversarial Attacks on Graph Neural Networks",
    "Personalized Recommendation Systems",
    "Video Understanding with Large Multimodal Models",
    "Metric Embeddings and Approximation Algorithms",
    "Quantum Machine Learning and Neural Networks",
    "Riemannian Manifolds and Conformal Metrics",
    "Set Theory and Algebraic Structures",
    "Large Language Models in Healthcare Applications",
    "Compositional Image Retrieval (CIR)",
    "Deep Learning Neural Networks",
    "Named Entity Recognition in Multilingual English",
    "Self-Supervised Learning and Deep Metric Learning",
    "Koopman Operator for Nonlinear System Control",
    "Machine Translation with Large Language Models",
    "Anomaly Detection with Multimodal Models",
    "Retrieval-Augmented Generation for Large Language Models",
    "Medical Image Segmentation with Transformers and CNNs",
    "Human Mobility and Traffic Prediction",
    "Deep Neural Network Watermarking Techniques",
    "Improving Transformers for Arithmetic Tasks",
    "Time Series Forecasting with Deep Learning Models",
    "Robot Manipulation and Autonomous Systems",
    "Speech-to-Text and Audio Processing",
    "Graph Contrastive Learning Methods",
    "Vision Transformers with Convolutional Layers",
    "Quantization Techniques for Large Language Models",
    "Mamba Model for Efficient Information Retrieval",
    "Partial Differential Equation Solvers",
    "Audio Captioning and Representation Learning",
    "Hateful Meme Detection Using Multimodal Analysis",
    "Grammatical Error Correction with Large Language Models",
    "Financial NLP and Large Language Models",
    "Diffusion Models for Text-Guided Image Generation and Editing",
    "Multiple Instance Learning for Histopathology Image Analysis",
    "Reinforcement Learning from Human Feedback",
    "Face Forgery Detection and Recognition",
    "Text Classification with Limited Annotations",
    "Multilingual Language Model Transfer",
    "Scene Text Recognition with Vision-Language Models",
    "Syntactic Parsing and Linguistic Analysis in NLP",
    "Entity Alignment in Knowledge Graphs",
    "Causal Analysis with Large Language Models",
    "Federated Learning for Large Language Models",
    "Gender Bias in Large Language Models",
    "Visual Classification with Descriptors and Embeddings",
    "Music Generation and Retrieval Systems",
    "Facial Expression Recognition Methods",
    "Massive MIMO Channel Prediction and Reconstruction",
    "Dialogue Systems and Conversational AI",
    "Sketch Representation Learning and Retrieval",
    "Handwritten Text Recognition and OCR",
    "Relational Reasoning in Neural Networks",
    "Advancements in Topic Modeling and Extraction Techniques",
    "Log Parsing and Analysis Techniques",
    "Intelligent Agents and Multi-Agent Systems in AI",
    "Open-Vocabulary Image Segmentation and Detection",
    "Personality Traits in Language Models",
    "Deep Learning Models for Tabular Data",
    "Adversarial Attacks on Language Models",
    "Video Object Segmentation and Tracking",
    "Multimodal Image Captioning Models",
    "Entity Matching with Large Language Models",
    "Adversarial Attacks on Vision Language Models",
    "3D Point Cloud Processing and Learning",
    "Graph Neural Networks for Privacy Preservation",
    "Continual Learning for Image Segmentation",
    "Graph Learning with Large Language Models",
    "Code Intelligence and Large Language Models",
    "Modular Symmetry in Quark Flavor Models",
    "Explainability in Large Language Models",
    "Quantum Entanglement and Generalised Probabilistic Theories",
    "Gesture Generation from Speech and Text",
    "Human Activity Recognition with Wearable Sensors",
    "Detecting Vulnerabilities in Code with Large Language Models",
    "3D Human Pose Estimation",
    "Sentiment Analysis of COVID-19 Tweets",
    "Aspect-Based Sentiment Analysis",
    "Stance Detection in Natural Language Processing",
    "Job Title Normalization and Recommendation Systems",
    "Multi-Object Tracking and Visual Trackers",
    "Emotion Recognition and Analysis in Text",
    "Unsupervised Domain Adaptation Methods",
    "Legal Language Models Evaluation",
    "Knowledge Distillation in Neural Networks",
    "Phishing Emails and Cybersecurity Threats",
    "Topological Data Analysis and Embeddings",
    "Missing Value Imputation Methods",
    "Action Recognition in Videos using Transformers",
    "Self-Supervised Learning for SAR Imagery Analysis",
    "Personalized Portrait Generation with Diffusion Models",
    "Autonomous Driving Technologies",
    "Document-Level Relation Extraction",
    "Keyphrase Generation and Evaluation",
    "Evolutionary Algorithms with Large Language Models",
    "Domain Generalization and Adaptation",
    "Backdoor Attacks and Defenses in Deep Learning",
    "Text-to-Motion Generation and Animation",
    "Load Forecasting with Deep Learning Models",
    "Skeleton-based Action Recognition",
    "Audio Encode and Voice Conversion Models",
    "Text-to-3D Generation and Editing",
    "Open-Vocabulary 3D Scene Segmentation",
    "Galaxy Classification in Astronomy",
    "ECG Interpretation and Diagnosis",
    "Knowledge Editing in Large Language Models",
    "Evaluating Gender Bias in Large Vision-Language Models",
    "Brain Network Analysis for Neurological Disorders",
    "Graph Transformers for Graph-Structured Data",
    "Parameter-Efficient Transfer Learning and Pre-Training",
    "Game Agents and AI-Driven Game Development",
    "Multimodal Large Language Models (MLLMs)",
    "Spoofing-Aware Speaker Verification Systems",
    "Random Processes and Stochastic Dynamics",
    "Automated Verilog Generation and Hardware Design",
    "Climate Change and NLP Applications",
    "Watermarking Techniques for Large Language Models",
    "Neural Networks and Brain Computation Dynamics",
    "Image Quality Assessment in Computer Vision",
    "Visual Place Recognition (VPR)",
    "Text Clustering with Large Language Models",
    "Defect Classification in Manufacturing",
    "Sexism Detection and Classification",
    "Electroencephalogram Analysis and Modeling",
    "Deep Learning for Weather Forecasting",
    "Context Length Extrapolation for LLMs",
    "Intelligent Tutoring Systems for Knowledge Tracing",
    "Anomaly Detection in Logs and Workflows",
    "Neural Network Pruning Techniques",
    "Privacy-Preserving Large Language Models",
    "Prompt Engineering for Large Language Models",
    "Adversarial Robustness in Deep Learning Models",
    "Detecting AI-Generated Texts",
    "Citation Analysis and Prediction in Scholarly Work",
    "Hate Speech Detection and Analysis",
    "Graph Neural Network Knowledge Distillation",
    "Brain-Inspired Language Models and Neural Representations",
    "Diffusion Transformers for Image Generation",
    "Vision-Language Prompt Learning",
    "Concept Erasure in Text-to-Image Models",
    "Large Language Model Safety Evaluation",
    "Few-Shot Object Detection",
    "Radiology Report Generation with Multimodal Models",
    "6D Object Pose Estimation in Robotics",
    "Edge Computing for AI and Language Models",
    "Sign Language Understanding and Retrieval",
    "Patent Analysis and Retrieval Systems",
    "Generative AI in Architectural Design",
    "Logical Reasoning with Large Language Models"
]

# Generate topic embeddings
topic_embeddings = model.encode(topic_labels)

# Define similarity threshold
SIMILARITY_THRESHOLD = 0.30

# Load JSONL data
data_path = '/Users/tomasnagy/FastAPI/application/data/filtered_papers_with_extracted_entities.jsonl'
updated_data = []

with open(data_path, 'r') as file:
    for line in file:
        paper = json.loads(line)
        abstract = paper.get('abstract', '')
        if not abstract:
            continue

        abstract_embedding = model.encode([abstract])
        similarities = cosine_similarity(abstract_embedding, topic_embeddings)[0]
        max_similarity_idx = np.argmax(similarities)
        max_similarity = similarities[max_similarity_idx]

        if max_similarity >= SIMILARITY_THRESHOLD:
            paper['human_readable_topic'] = topic_labels[max_similarity_idx]
        else:
            paper['human_readable_topic'] = "Uncategorized"

        updated_data.append(paper)

# Save updated data
updated_data_path = '/Users/tomasnagy/FastAPI/application/data/updated_2025-03-13_merged.jsonl'
with open(updated_data_path, 'w') as outfile:
    for entry in updated_data:
        json.dump(entry, outfile)
        outfile.write('\n')

print(f"Updated data saved to {updated_data_path}")


Updated data saved to /Users/tomasnagy/FastAPI/application/data/updated_2025-03-13_merged.jsonl


In [10]:
import json
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer


# Load pre-trained sentence transformer model
model_dir = "/Users/tomasnagy/FastAPI/application/models/embeddings/all-MiniLM-L6-v2"
model =  SentenceTransformer(model_dir)

# Predefined 30 topic labels
topic_labels = [
    "Modal Logics and Formalization",
    "Federated Recommendation Systems with Personalization and Privacy",
    "Molecular and Protein Representation Learning",
    "Particle Detection and Classification in High Energy Physics",
    "Planar Graph Algorithms and Properties",
    "Algebraic Geometry and Representation Theory",
    "Knowledge Graph Completion Methods",
    "Steganography Techniques and Methods",
    "Group Theory and Embeddings",
    "Fractional Sobolev Embeddings",
    "Humor Detection and Generation",
    "Network Embeddings and Community Detection",
    "Sarcasm Detection in Natural Language Processing",
    "Manifold Learning and Dimensionality Reduction",
    "Hypergraph and Graph Neural Networks",
    "Sentence Embeddings and Semantic Meaning Analysis",
    "Adversarial Attacks on Graph Neural Networks",
    "Personalized Recommendation Systems",
    "Video Understanding with Large Multimodal Models",
    "Metric Embeddings and Approximation Algorithms",
    "Quantum Machine Learning and Neural Networks",
    "Riemannian Manifolds and Conformal Metrics",
    "Set Theory and Algebraic Structures",
    "Large Language Models in Healthcare Applications",
    "Compositional Image Retrieval (CIR)",
    "Deep Learning Neural Networks",
    "Named Entity Recognition in Multilingual English",
    "Self-Supervised Learning and Deep Metric Learning",
    "Koopman Operator for Nonlinear System Control",
    "Machine Translation with Large Language Models",
    "Anomaly Detection with Multimodal Models",
    "Retrieval-Augmented Generation for Large Language Models",
    "Medical Image Segmentation with Transformers and CNNs",
    "Human Mobility and Traffic Prediction",
    "Deep Neural Network Watermarking Techniques",
    "Improving Transformers for Arithmetic Tasks",
    "Time Series Forecasting with Deep Learning Models",
    "Robot Manipulation and Autonomous Systems",
    "Speech-to-Text and Audio Processing",
    "Graph Contrastive Learning Methods",
    "Vision Transformers with Convolutional Layers",
    "Quantization Techniques for Large Language Models",
    "Mamba Model for Efficient Information Retrieval",
    "Partial Differential Equation Solvers",
    "Audio Captioning and Representation Learning",
    "Hateful Meme Detection Using Multimodal Analysis",
    "Grammatical Error Correction with Large Language Models",
    "Financial NLP and Large Language Models",
    "Diffusion Models for Text-Guided Image Generation and Editing",
    "Multiple Instance Learning for Histopathology Image Analysis",
    "Reinforcement Learning from Human Feedback",
    "Face Forgery Detection and Recognition",
    "Text Classification with Limited Annotations",
    "Multilingual Language Model Transfer",
    "Scene Text Recognition with Vision-Language Models",
    "Syntactic Parsing and Linguistic Analysis in NLP",
    "Entity Alignment in Knowledge Graphs",
    "Causal Analysis with Large Language Models",
    "Federated Learning for Large Language Models",
    "Gender Bias in Large Language Models",
    "Visual Classification with Descriptors and Embeddings",
    "Music Generation and Retrieval Systems",
    "Facial Expression Recognition Methods",
    "Massive MIMO Channel Prediction and Reconstruction",
    "Dialogue Systems and Conversational AI",
    "Sketch Representation Learning and Retrieval",
    "Handwritten Text Recognition and OCR",
    "Relational Reasoning in Neural Networks",
    "Advancements in Topic Modeling and Extraction Techniques",
    "Log Parsing and Analysis Techniques",
    "Intelligent Agents and Multi-Agent Systems in AI",
    "Open-Vocabulary Image Segmentation and Detection",
    "Personality Traits in Language Models",
    "Deep Learning Models for Tabular Data",
    "Adversarial Attacks on Language Models",
    "Video Object Segmentation and Tracking",
    "Multimodal Image Captioning Models",
    "Entity Matching with Large Language Models",
    "Adversarial Attacks on Vision Language Models",
    "3D Point Cloud Processing and Learning",
    "Graph Neural Networks for Privacy Preservation",
    "Continual Learning for Image Segmentation",
    "Graph Learning with Large Language Models",
    "Code Intelligence and Large Language Models",
    "Modular Symmetry in Quark Flavor Models",
    "Explainability in Large Language Models",
    "Quantum Entanglement and Generalised Probabilistic Theories",
    "Gesture Generation from Speech and Text",
    "Human Activity Recognition with Wearable Sensors",
    "Detecting Vulnerabilities in Code with Large Language Models",
    "3D Human Pose Estimation",
    "Sentiment Analysis of COVID-19 Tweets",
    "Aspect-Based Sentiment Analysis",
    "Stance Detection in Natural Language Processing",
    "Job Title Normalization and Recommendation Systems",
    "Multi-Object Tracking and Visual Trackers",
    "Emotion Recognition and Analysis in Text",
    "Unsupervised Domain Adaptation Methods",
    "Legal Language Models Evaluation",
    "Knowledge Distillation in Neural Networks",
    "Phishing Emails and Cybersecurity Threats",
    "Topological Data Analysis and Embeddings",
    "Missing Value Imputation Methods",
    "Action Recognition in Videos using Transformers",
    "Self-Supervised Learning for SAR Imagery Analysis",
    "Personalized Portrait Generation with Diffusion Models",
    "Autonomous Driving Technologies",
    "Document-Level Relation Extraction",
    "Keyphrase Generation and Evaluation",
    "Evolutionary Algorithms with Large Language Models",
    "Domain Generalization and Adaptation",
    "Backdoor Attacks and Defenses in Deep Learning",
    "Text-to-Motion Generation and Animation",
    "Load Forecasting with Deep Learning Models",
    "Skeleton-based Action Recognition",
    "Audio Encode and Voice Conversion Models",
    "Text-to-3D Generation and Editing",
    "Open-Vocabulary 3D Scene Segmentation",
    "Galaxy Classification in Astronomy",
    "ECG Interpretation and Diagnosis",
    "Knowledge Editing in Large Language Models",
    "Evaluating Gender Bias in Large Vision-Language Models",
    "Brain Network Analysis for Neurological Disorders",
    "Graph Transformers for Graph-Structured Data",
    "Parameter-Efficient Transfer Learning and Pre-Training",
    "Game Agents and AI-Driven Game Development",
    "Multimodal Large Language Models (MLLMs)",
    "Spoofing-Aware Speaker Verification Systems",
    "Random Processes and Stochastic Dynamics",
    "Automated Verilog Generation and Hardware Design",
    "Climate Change and NLP Applications",
    "Watermarking Techniques for Large Language Models",
    "Neural Networks and Brain Computation Dynamics",
    "Image Quality Assessment in Computer Vision",
    "Visual Place Recognition (VPR)",
    "Text Clustering with Large Language Models",
    "Defect Classification in Manufacturing",
    "Sexism Detection and Classification",
    "Electroencephalogram Analysis and Modeling",
    "Deep Learning for Weather Forecasting",
    "Context Length Extrapolation for LLMs",
    "Intelligent Tutoring Systems for Knowledge Tracing",
    "Anomaly Detection in Logs and Workflows",
    "Neural Network Pruning Techniques",
    "Privacy-Preserving Large Language Models",
    "Prompt Engineering for Large Language Models",
    "Adversarial Robustness in Deep Learning Models",
    "Detecting AI-Generated Texts",
    "Citation Analysis and Prediction in Scholarly Work",
    "Hate Speech Detection and Analysis",
    "Graph Neural Network Knowledge Distillation",
    "Brain-Inspired Language Models and Neural Representations",
    "Diffusion Transformers for Image Generation",
    "Vision-Language Prompt Learning",
    "Concept Erasure in Text-to-Image Models",
    "Large Language Model Safety Evaluation",
    "Few-Shot Object Detection",
    "Radiology Report Generation with Multimodal Models",
    "6D Object Pose Estimation in Robotics",
    "Edge Computing for AI and Language Models",
    "Sign Language Understanding and Retrieval",
    "Patent Analysis and Retrieval Systems",
    "Generative AI in Architectural Design",
    "Logical Reasoning with Large Language Models",
        "Natural Language to Visualization (NL2VIS)",
    "Mamba Models for Computer Vision Tasks",
    "Neural Radiance Fields for 3D Scene Rendering",
    "Generative AI in Telecommunications",
    "Moral Decision-Making in Large Language Models",
    "Chain-of-Thought Prompting for Reasoning",
    "Scaling Laws in Large Language Models",
    "Multimodal Entity Linking and Recognition",
    "Sentence Simplification and Evaluation",
    "Large Language Model Data Curation and Deduplication",
    "Large Language Models for Planning Tasks",
    "Backdoor Attacks on Large Language Models",
    "Automated Program Repair with Large Language Models",
    "Concept Bottleneck Models for Visual Classification",
    "Data Contamination in Large Language Models",
    "Efficient Pruning of Large Language Models",
    "Text Style Transfer",
    "Commonsense Knowledge in Large Language Models",
    "Large Language Models and Copyright Infringement",
    "AI-Assisted Code Generation Tools Evaluation",
    "Generative Adversarial Networks (GANs)",
    "Transformer Models and Attention Mechanisms",
    "Speech-Driven 3D Facial Animation",
    "Medical Imaging Reconstruction and Denoising",
    "Assessing Creativity in Large Language Models",
    "Differential Privacy in Machine Learning",
    "Code Review Automation with LLMs",
    "Large Language Models and Privacy Compliance",
    "Mathematical Proof Formalization with AI",
    "Low-Rank Adaptation (LoRA) for Efficient Fine-Tuning",
    "Sparse Activations in Large Language Models",
    "Argument Mining and Quality Assessment",
    "Knowledge Graphs and Large Language Models Integration",
    "Recurrent Neural Networks for Sequence Modeling",
    "Offline-to-Online Reinforcement Learning",
    "Language Model Privacy and Memorization Risks",
    "Arabic Language Models Development",
    "Influence Functions for Machine Learning Models",
    "Social Bot Detection Methods",
    "Merging Large Language Models for Enhanced Expertise",
    "Large Language Models for Wireless Networking",
    "Named Entity Recognition (NER)",
    "Multimodal GUI Agents for Task Automation",
    "Smart Contract Vulnerability Detection",
    "Multi-Agent Debate Frameworks",
    "Analogical Reasoning in Large Language Models",
    "Multimodal Misinformation Detection and Debunking",
    "Knowledge Graph Construction and Entity Relations",
    "Chatbot Conversations and Dialogue Systems",
    "Catastrophic Forgetting in Large Language Models",
    "NLG Evaluation Metrics and Models",
    "Logical Reasoning in Large Language Models",
    "Role-Playing with Large Language Models",
    "Conversational Search and Retrieval Systems",
    "Text-to-SQL Generation and Optimization",
    "Evaluating Large Language Models for Factoid Question Answering",
    "Machine-Assisted Peer Review",
    "Code Generation with Large Language Models",
    "Ethics in Artificial Intelligence Development",
    "Large Language Model Data Preparation Toolkits",
    "Human-Object Interaction Detection",
    "Large Language Models in Social Simulations and Cooperation",
    "Mamba Models for Efficient NLP Applications",
    "Content Moderation with Large Language Models",
    "Large Language Models for Social Science Classification",
    "Zero-Shot Learning for IoT Sensing and Tasks",
    "Story Generation and Evaluation",
    "Memory-Efficient Zeroth-Order Optimizers",
    "Sports Analytics and AI Models",
    "Table Understanding and Reasoning with Language Models",
    "Evaluating Chinese Language Models",
    "Applying Large Language Models in Requirements Engineering",
    "Energy and Engineering Applications of AI",
    "Open-Source Large Language Models for AI Development",
    "Metaphor Understanding and Generation",
    "Poetry Generation and Analysis",
    "AI-Powered Design and Prototyping Tools",
    "Automated Red Teaming for Large Language Models",
    "Large Language Models in Software Engineering",
    "Impact of AI on Software Development",
    "Long-Context Language Model Evaluation",
    "Crowdsourcing and Large Language Model Annotation",
    "Hyperparameter Optimization Methods",
    "Assurance Case Automation for Safety-Critical Systems",
    "Toxicity Detection in Text and Language Models",
    "AI-Generated Narrative in Games",
    "Automated Penetration Testing with Large Language Models",
    "Speculative Decoding for Large Language Models",
    "Large Language Models for Low-Resource Languages",
    "Synthetic Data Generation with AI and Privacy Concerns",
    "Large Language Models for Mobile GUI Testing",
    "Attribution in Large Language Models",
    "Improving LLM Reasoning with MCTS",
    "Large Vision-Language Models Hallucination Analysis",
    "Web Agents and Automation Tasks",
    "Astronomy and Astrophysics Research",
    "On-Device Large Language Models for Mobile",
    "Homomorphic Encryption for Secure Machine Learning",
    "Cultural Sensitivity in Large Language Models",
    "Natural Language Processing Models and Synthetic Data",
    "Mathematical Reasoning and Question Generation",
    "Geospatial Language Models and GeoAI",
    "Explainable Vision Transformers and Models",
    "Sentiment Analysis in NLP",
    "Network Traffic Analysis and Modeling",
    "Jailbreak Attacks on Large Language Models",
    "Personalization of Large Language Models",
    "Brain Decoding with fMRI and Visual Semantics",
    "Text Summarization with Large Language Models",
    "Medical Image Segmentation with SAM",
    "Theory of Mind in Large Language Models",
    "Social Media Opinion Mining for Elections",
    "Weakly Supervised Semantic Segmentation",
    "Gaze Estimation and Tracking Systems",
    "Large Language Model Security Vulnerabilities and Attacks",
    "Optimizing KV Cache for Large Language Models",
    "Monocular Depth Estimation with Transformers and CNNs",
    "3D Visual Grounding and Scene Understanding",
    "Agricultural Applications of AI and Large Models",
    "Multilingual Fact-Checking and Evidence Retrieval",
    "Federated Learning for Medical Imaging Data Privacy",
    "Fairness in Large Language Models",
    "Reinforcement Learning with Large Language Models",
    "Algorithmic Fairness in Machine Learning",
    "Symbolic Regression and Equation Discovery",
    "Zero-Shot Learning for Visual Recognition",
    "Diffusion Models for NLP",
    "E-commerce Product Attribute Extraction",
    "In-Context Learning in Transformers",
    "Disaster Response using Twitter Data Classification",
    "Fault Diagnosis in Industrial Machinery",
    "Spiking Neural Networks for Image and Language Processing",
    "Automated Test Generation with Large Language Models",
    "Discourse Parsing and Corpus Analysis",
    "Food Computing and Recipe Generation",
    "Automating Ontology Generation with Large Language Models",
    "Cybersecurity and Threat Analysis",
    "Surgical Video Analysis and Recognition",
    "Layer Normalization in Transformers",
    "AI-powered Tutoring in Programming Education",
    "Remote Sensing Image Change Detection and Captioning",
    "Self-Supervised Learning for Medical Imaging Analysis",
    "Generative AI and its Applications",
    "Adversarial Attacks on Retrieval-Augmented Generation",
    "Remote Sensing Multimodal Language Models",
    "Language Models for Tabular Data Analysis",
    "Language Model Compression Techniques",
    "Optimization Methods for Neural Networks",
    "Optimizing GPU Utilization for Large Language Models",
    "Conversational Intent Understanding and Dialogue Systems",
    "Electric Grid Management and Electrification",
    "Chart Understanding with Large Foundation Models",
    "Temporal Event Forecasting and Reasoning",
    "Fake News Detection in AI-Generated Content",
    "In-Context Learning in Large Language Models",
    "Fuzzing with Large Language Models",
    "Negation Understanding in Large Language Models",
    "Prompt Compression Techniques",
    "Propaganda Detection and Analysis",
    "Improving Instruction Following in Large Language Models",
    "Code Completion and Retrieval",
    "Large Language Models in Politics",
    "Knowledge Graph-based Question Answering",
    "Automated Program Verification with Large Language Models",
    "Large Language Model Hallucinations",
    "Process Mining with Large Language Models",
    "Diffusion-Based Video Generation",
    "Self-Admitted Technical Debt Detection in Software Development",
    "Analyzing Political Discourse and Ideology",
    "Automated Grading and Assessment with Large Language Models",
    "Event Extraction and Semantic Annotation",
    "AI-Assisted Text Annotation and Labeling",
    "Neural Network Quantization Methods",
    "Uncertainty in Large Language Models",
    "Deep Learning for Wildlife and Image Classification",
    "Paraphrasing in Natural Language Processing",
    "Reinforcement Learning with Transformers",
    "Document Layout Understanding with Large Language Models",
    "Code Hallucinations in Large Language Models",
    "Blockchain Security and Development",
    "Language-Guided 3D Scene Generation",
    "Contextualized Word Embeddings for Semantic Shift Detection",
    "Machine Unlearning in Large Language Models",
    "Tool Learning with Large Language Models",
    "Mixture of Experts (MoE) for Large Language Models",
        "3D Object Detection with LiDAR and Cameras",
    "Reasoning Distillation in Language Models",
    "Self-Correction in Large Language Models",
    "Continual Learning in Large Language Models",
    "Knowledge Distillation for CNNs",
    "Code Generation with Reinforcement Learning",
    "FPGA-based Accelerators for Vision Transformers",
    "Hopfield Networks and Memory Retrieval Models",
    "Graph Prompting for Pre-trained Graph Models",
    "Spatial Relationship Understanding in Multimodal Models",
    "Masked Autoencoders for Vision Representation",
    "In-Context Learning with Transformers",
    "Video Frame Interpolation and Optical Flow Estimation",
    "Image Super-Resolution Methods",
    "Computer Vision for Crack and Stress Detection",
    "Parameter-Efficient Fine-Tuning for Large Language Models",
    "Diabetic Retinopathy Classification using Deep Learning",
    "Brain Age Estimation using Neuroimaging and Machine Learning",
    "Prompt Tuning for Few-Shot Learning Tasks",
    "Knowledge Conflicts in Large Language Models",
    "Graph Representation Learning and Node Embeddings",
    "Quantization and Manifold Embeddings",
    "Ontology Alignment and Knowledge Graph Reasoning",
    "Morphological Tokenization for Multilingual NLP",
    "Prompt Learning for NLP Tasks",
    "Graph Embeddings and Treewidth Approximation",
    "NLP for Low-Resource Indian Languages",
    "Medical Image Segmentation with Knowledge Transfer",
    "Knowledge Graphs and Entity Linking",
    "Multimodal Learning and Visual Recognition",
    "Cultural Awareness in Multimodal AI Systems",
    "Video Object Segmentation with Multimodal Attention",
    "Seismic Data Analysis with Deep Learning",
    "Temporal Graph Embeddings and Analysis",
    "Multimodal Video Understanding and Generation",
    "Fake Review Detection and Sentiment Analysis",
    "Multilabel Image Recognition with Vision-Language Models",
    "Contrastive Vision-Language Models",
    "Multimodal Representation Learning",
    "Large Language Models for Agent Planning",
    "Kernel Methods for Learning Complex Function Spaces",
    "Knowledge Retrieval for Large Language Models",
    "Machine Learning for Complex Systems Modeling",
    "Multimodal Audio-Visual Language Models",
    "Video Transformers for Spatiotemporal Tasks",
    "In-Context Learning for NLP Tasks",
    "Federated Learning for Private Data",
    "Self-Supervised Learning for Image Classification",
    "Medical Image Analysis with Pre-trained Models",
    "CLIP Models for Visual Learning and Adaptation",
    "Extensible Tokenization for Large Language Models",
    "Hashing Techniques for Efficient Search and Retrieval",
    "Temporal Knowledge Graph Embeddings",
    "Mobile App Review Analysis and Sentiment Extraction",
    "Quantum Systems and Hamiltonian Dynamics",
    "Person Re-Identification with Attributes",
    "Scientific Literature Analysis and Retrieval",
    "Adapting Large Language Models to Specialized Domains",
    "Graph Transformers and Neural Networks",
    "Large Language Models for Text Analysis",
    "Medical Image Registration Methods",
    "Extreme Multi-label Text Classification",
    "Hyperbolic Transformers for Complex Data Modeling",
    "E-commerce and Retail Analytics",
    "Prompt Tuning for Pre-Trained Language Models",
    "Efficient Sparse Attention Mechanisms for LLMs",
    "Synthetic Image Generation for Deep Learning",
    "Reinforcement Learning Methods and Applications",
    "Knowledge Base Question Answering",
    "Contrastive Multimodal Embeddings and Retrieval",
    "Novel View Synthesis and 3D Human Animation",
    "Neural Language Models and Automata Extraction",
    "Image Segmentation with Foundation Models",
    "Information Extraction in NLP",
    "Image Fusion with CNN and Transformer Networks",
    "Parameter-Efficient Fine-Tuning of Pre-Trained Models",
    "Self-Supervised Learning for Vision Models",
    "Backdoor Attacks on NLP Models",
    "Transformer Attention Mechanisms",
    "Efficient Pruning Methods for Large Language Models",
    "Medical Image Segmentation and Classification",
    "Efficient Model Stitching for Dynamic Deployment",
    "Independent Component Analysis for Word Embeddings",
    "3D Point Cloud Pre-training Methods",
    "Text-to-3D Scene Generation and Understanding",
    "Generative AI for Urban Mobility and Transportation",
    "Speech Synthesis and Prosody Analysis",
    "Out-of-Distribution Detection Methods",
    "Social Media Analysis and Sentiment Detection",
    "Natural Language Programming with Large Language Models",
    "Reasoning and Problem Solving with Large Language Models",
    "Object Detection with Transformers",
    "Multimodal Learning for Medical Prediction",
    "Large Language Model Routing and Optimization",
    "Efficient Deep Neural Network Compression Techniques",
    "Reasoning Capabilities of Large Language Models",
    "Efficient Transformers and Softmax Models",
    "Multimodal Urban Analytics with Geospatial Imagery",
    "Code Clone Detection and Analysis",
    "Optimization Techniques for Large Language Models",
    "Large Language Models in NLP Tasks",
    "3D Lane Detection in Autonomous Driving",
    "Saliency Prediction in Images and Vision",
    "Efficient Transformer Training for Long Sequences",
    "Hardware Accelerators for Transformer Models",
    "Microscopy Image Segmentation with Deep Learning",
    "Internal Mechanisms of Multilingual Language Models",
    "Time Series Prediction and Forecasting Models",
    "Visual Object Recognition and Captioning",
    "Multimodal Knowledge Distillation",
    "Software Development with AI-Powered Tools",
    "AI-Driven Scientific Discovery and Automation",
    "Advancements in AI Reasoning and Planning",
    "Speech-to-Speech Conversational Models",
    "Cybersecurity Risks and Vulnerabilities in AI Models",
    "Generative AI and Variational Models",
    "Biases in AI and NLP Models",
    "Code-Mixed Language Processing",
    "Cognitive Architectures and Large Language Models",
    "Remote Sensing Image Segmentation",
    "Large Language Model Privacy Protection",
    "Sparse Neural Networks and Model Compression",
    "Adversarial Attacks on Large Language Models",
    "Meta-Learning and Optimization Algorithms",
    "Reinforcement Learning for Interactive Agents",
    "Image Inpainting with Deep Learning and Attention",
    "AI-Generated Educational Crosswords and Language Models",
    "Information Retrieval and Summarization Techniques",
    "Detecting Misinformation and Bias in Media",
    "Information Retrieval and Ranking Techniques",
    "Deep Learning and Neural Networks Optimization",
    "Evaluating Large Language Models on Multiple Choice Questions",
    "Compiler Optimization and Code Translation",
    "Reward Learning for Complex Tasks",
    "Sparse Autoencoders for Neural Network Interpretability",
    "Evaluating Language Models for Question Answering Tasks",
    "Multimodal Visual Understanding and Generation",
    "Personalized Search with Large Language Models",
    "Personalized Tour Itinerary Recommendation",
    "Evaluating Large Language Models for NLG Tasks",
    "Self-Supervised Learning in Medical Imaging",
    "Opinion Summarization in E-commerce Reviews",
    "Watermarking Techniques for Generative Models",
    "Evaluating Large Language Models for Reliable Answering",
    "In-Context Learning for Cross-Lingual Tasks",
    "Vision Language Models (VLMs)",
    "IoT Cybersecurity and Threat Detection",
    "Cognitive Biases in Large Language Models",
    "Conversational AI Safety and Toxicity Detection",
    "Efficient KV Cache Compression for Large Language Models",
    "Time Series Anomaly Detection with LLMs",
    "Multimodal Foundation Models for Multimedia",
    "Deep Learning with Transformers and Attention",
    "Self-Improvement Methods for Large Language Models",
    "Generative AI in Construction and Industrial Automation",
    "AI in Software Engineering and Development",
    "AI Performance on Standardized Exams and Educational Assessments",
    "Wireless Networks and Federated Learning",
    "Convolutional Neural Networks and Transformers in Medical Imaging",
    "Instruction Following in Large Language Models",
    "Binarization of Vision Transformers",
    "Chain-of-Thought Reasoning in Large Language Models",
    "Visual Adaptation with Learnable Tokens",
    "Code Summarization and API Generation",
    "Large Language Models for Hardware Security and Bug Repair",
    "Linguistic Ambiguity in NLP Systems",
    "Multilingual Language Models and Question Generation",
    "Large Language Models Development and Evaluation",
    "Multimodal Layout Understanding and Generation",
    "Generative AI in Virtual Reality",
    "Automating Software Development with Large Language Models",
    "Improving Code Generation with Large Language Models",
    "Large Language Model Evaluation Benchmarks",
    "Human-Centered Writing Assistance with AI",
    "Large Language Model Fact Memorization",
    "Language Model Tuning and Training"
]

# Generate topic embeddings
topic_embeddings = model.encode(topic_labels)

# Define similarity threshold
SIMILARITY_THRESHOLD = 0.30

# Load JSONL data
data_path = '/Users/tomasnagy/FastAPI/application/data/filtered_papers_with_extracted_entities.jsonl'
updated_data = []

with open(data_path, 'r') as file:
    for line in file:
        paper = json.loads(line)
        abstract = paper.get('abstract', '')
        if not abstract:
            continue

        abstract_embedding = model.encode([abstract])
        similarities = cosine_similarity(abstract_embedding, topic_embeddings)[0]
        max_similarity_idx = np.argmax(similarities)
        max_similarity = similarities[max_similarity_idx]

        if max_similarity >= SIMILARITY_THRESHOLD:
            paper['human_readable_topic'] = topic_labels[max_similarity_idx]
        else:
            paper['human_readable_topic'] = "Uncategorized"

        updated_data.append(paper)

# Save updated data
updated_data_path = '/Users/tomasnagy/FastAPI/application/data/updated_2025-03-13_merged.jsonl'
with open(updated_data_path, 'w') as outfile:
    for entry in updated_data:
        json.dump(entry, outfile)
        outfile.write('\n')

print(f"Updated data saved to {updated_data_path}")


Updated data saved to /Users/tomasnagy/FastAPI/application/data/updated_2025-03-13_merged.jsonl
