# Análisis de Términos de Búsqueda: Mapping Review IA y ML en Educación Matemática K-12\n
\n
**MQ6: ¿Qué términos de búsqueda se utilizan para definir la cadena de búsqueda en este tipo de estudios?**\n
\n
Este notebook analiza los términos de búsqueda y palabras clave utilizados en los estudios sobre IA y ML en educación matemática K-12.

## 1. Configuración del Entorno

In [None]:
# Instalación de dependencias\n
!pip install pandas numpy matplotlib seaborn plotly nltk wordcloud textblob

In [None]:
# Importación de librerías\n
import pandas as pd\n
import numpy as np\n
import matplotlib.pyplot as plt\n
import seaborn as sns\n
import plotly.express as px\n
import plotly.graph_objects as go\n
from plotly.subplots import make_subplots\n
import re\n
import nltk\n
from nltk.corpus import stopwords\n
from nltk.tokenize import word_tokenize\n
from collections import Counter\n
from wordcloud import WordCloud\n
from textblob import TextBlob\n
import warnings\n
warnings.filterwarnings('ignore')\n
\n
# Descargar recursos de NLTK\n
try:\n
    nltk.data.find('tokenizers/punkt')\n
except LookupError:\n
    nltk.download('punkt')\n
\n
try:\n
    nltk.data.find('corpora/stopwords')\n
except LookupError:\n
    nltk.download('stopwords')\n
\n
# Configuración de estilo\n
plt.style.use('seaborn-v0_8')\n
sns.set_palette(\"husl\")\n
plt.rcParams['figure.figsize'] = (12, 8)\n
plt.rcParams['font.size'] = 12\n
\n
# Configuración para mostrar todas las columnas\n
pd.set_option('display.max_columns', None)\n
pd.set_option('display.max_colwidth', None)

## 2. Carga de Datos desde GitHub

In [None]:
# Cargar el dataset desde GitHub\n
# IMPORTANTE: Cambiar la URL por tu repositorio real\n
url = \"https://raw.githubusercontent.com/TU_USUARIO/TU_REPOSITORIO/main/MappingReview.csv\"\n
df = pd.read_csv(url, sep=';', encoding='utf-8')\n
\n
print(f\"Dataset cargado: {df.shape[0]} filas y {df.shape[1]} columnas\")\n
print(\"\\nPrimeras 5 filas:\")\n
df.head()

## 3. Análisis de Términos de Búsqueda (MQ6)

In [None]:
# Función para extraer términos de búsqueda de títulos y abstracts\n
def extract_search_terms(text):\n
    if pd.isna(text):\n
        return []\n
    \n
    # Convertir a minúsculas\n
    text = str(text).lower()\n
    \n
    # Definir términos de búsqueda relevantes\n
    search_terms = [\n
        'artificial intelligence', 'ai', 'machine learning', 'ml', 'deep learning',\n
        'neural network', 'neural networks', 'intelligent tutoring system', 'its',\n
        'chatbot', 'chatbots', 'natural language processing', 'nlp',\n
        'computer vision', 'predictive analytics', 'data mining',\n
        'mathematics', 'math', 'mathematical', 'algebra', 'geometry', 'calculus',\n
        'k-12', 'k12', 'primary education', 'secondary education',\n
        'elementary', 'middle school', 'high school', 'primary school',\n
        'student', 'students', 'teacher', 'teachers', 'classroom',\n
        'learning', 'teaching', 'education', 'educational',\n
        'assessment', 'evaluation', 'performance', 'achievement',\n
        'personalized', 'adaptive', 'individualized', 'customized',\n
        'stem', 'science', 'technology', 'engineering',\n
        'digital', 'online', 'virtual', 'remote', 'distance learning'\n
    ]\n
    \n
    found_terms = []\n
    for term in search_terms:\n
        if term in text:\n
            found_terms.append(term)\n
    \n
    return found_terms\n
\n
# Aplicar extracción a títulos y abstracts\n
df['Title_Terms'] = df['Title'].apply(extract_search_terms)\n
df['Abstract_Terms'] = df['Abstract'].apply(extract_search_terms)\n
\n
print(\"Extracción de términos de búsqueda completada\")\n
print(f\"Publicaciones con términos en títulos: {len(df[df['Title_Terms'].apply(len) > 0])}\")\n
print(f\"Publicaciones con términos en abstracts: {len(df[df['Abstract_Terms'].apply(len) > 0])}\")

In [None]:
# Análisis de frecuencia de términos\n
all_title_terms = []\n
all_abstract_terms = []\n
\n
for terms in df['Title_Terms']:\n
    all_title_terms.extend(terms)\n
\n
for terms in df['Abstract_Terms']:\n
    all_abstract_terms.extend(terms)\n
\n
# Contar frecuencia\n
title_term_counts = Counter(all_title_terms)\n
abstract_term_counts = Counter(all_abstract_terms)\n
\n
print(\"=== TÉRMINOS MÁS FRECUENTES EN TÍTULOS ===\")\n
for term, count in title_term_counts.most_common(15):\n
    print(f\"{term}: {count} apariciones\")\n
\n
print(\"\\n=== TÉRMINOS MÁS FRECUENTES EN ABSTRACTS ===\")\n
for term, count in abstract_term_counts.most_common(15):\n
    print(f\"{term}: {count} apariciones\")

In [None]:
# Gráfico de barras para términos más frecuentes\n
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))\n
\n
# Términos en títulos\n
top_title_terms = dict(title_term_counts.most_common(10))\n
bars1 = ax1.bar(range(len(top_title_terms)), top_title_terms.values(), color='lightblue', alpha=0.7)\n
ax1.set_xlabel('Términos de Búsqueda', fontsize=14)\n
ax1.set_ylabel('Frecuencia', fontsize=14)\n
ax1.set_title('Términos Más Frecuentes en Títulos', fontsize=16, fontweight='bold')\n
ax1.set_xticks(range(len(top_title_terms)))\n
ax1.set_xticklabels(top_title_terms.keys(), rotation=45, ha='right')\n
ax1.grid(True, alpha=0.3, axis='y')\n
\n
# Agregar valores en las barras\n
for i, (bar, count) in enumerate(zip(bars1, top_title_terms.values())):\n
    ax1.text(i, count + 0.5, str(count), ha='center', va='bottom', fontweight='bold')\n
\n
# Términos en abstracts\n
top_abstract_terms = dict(abstract_term_counts.most_common(10))\n
bars2 = ax2.bar(range(len(top_abstract_terms)), top_abstract_terms.values(), color='lightcoral', alpha=0.7)\n
ax2.set_xlabel('Términos de Búsqueda', fontsize=14)\n
ax2.set_ylabel('Frecuencia', fontsize=14)\n
ax2.set_title('Términos Más Frecuentes en Abstracts', fontsize=16, fontweight='bold')\n
ax2.set_xticks(range(len(top_abstract_terms)))\n
ax2.set_xticklabels(top_abstract_terms.keys(), rotation=45, ha='right')\n
ax2.grid(True, alpha=0.3, axis='y')\n
\n
# Agregar valores en las barras\n
for i, (bar, count) in enumerate(zip(bars2, top_abstract_terms.values())):\n
    ax2.text(i, count + 0.5, str(count), ha='center', va='bottom', fontweight='bold')\n
\n
plt.tight_layout()\n
plt.show()

## 4. Análisis de Palabras Clave y Nubes de Palabras

In [None]:
# Función para limpiar texto para nube de palabras\n
def clean_text_for_wordcloud(text):\n
    if pd.isna(text):\n
        return ''\n
    \n
    # Convertir a minúsculas\n
    text = str(text).lower()\n
    \n
    # Remover caracteres especiales\n
    text = re.sub(r'[^a-zA-Z\s]', ' ', text)\n
    \n
    # Tokenizar\n
    tokens = word_tokenize(text)\n
    \n
    # Remover stopwords\n
    stop_words = set(stopwords.words('english'))\n
    tokens = [token for token in tokens if token not in stop_words and len(token) > 2]\n
    \n
    return ' '.join(tokens)\n
\n
# Crear texto combinado para nube de palabras\n
all_titles = ' '.join(df['Title'].fillna('').astype(str))\n
all_abstracts = ' '.join(df['Abstract'].fillna('').astype(str))\n
\n
# Limpiar texto\n
clean_titles = clean_text_for_wordcloud(all_titles)\n
clean_abstracts = clean_text_for_wordcloud(all_abstracts)\n
\n
print(\"Texto procesado para nubes de palabras\")

In [None]:
# Generar nubes de palabras\n
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))\n
\n
# Nube de palabras para títulos\n
wordcloud_titles = WordCloud(width=800, height=400, background_color='white', \n
                            max_words=100, colormap='viridis').generate(clean_titles)\n
ax1.imshow(wordcloud_titles, interpolation='bilinear')\n
ax1.axis('off')\n
ax1.set_title('Nube de Palabras - Títulos', fontsize=16, fontweight='bold')\n
\n
# Nube de palabras para abstracts\n
wordcloud_abstracts = WordCloud(width=800, height=400, background_color='white', \n
                               max_words=100, colormap='plasma').generate(clean_abstracts)\n
ax2.imshow(wordcloud_abstracts, interpolation='bilinear')\n
ax2.axis('off')\n
ax2.set_title('Nube de Palabras - Abstracts', fontsize=16, fontweight='bold')\n
\n
plt.tight_layout()\n
plt.show()

## 5. Análisis de Categorías de Términos

In [None]:
# Definir categorías de términos\n
term_categories = {\n
    'AI_ML': ['artificial intelligence', 'ai', 'machine learning', 'ml', 'deep learning', 'neural network', 'neural networks'],\n
    'Education': ['education', 'educational', 'learning', 'teaching', 'student', 'students', 'teacher', 'teachers', 'classroom'],\n
    'Mathematics': ['mathematics', 'math', 'mathematical', 'algebra', 'geometry', 'calculus'],\n
    'K12': ['k-12', 'k12', 'primary education', 'secondary education', 'elementary', 'middle school', 'high school', 'primary school'],\n
    'Technology': ['intelligent tutoring system', 'its', 'chatbot', 'chatbots', 'natural language processing', 'nlp', 'computer vision'],\n
    'Assessment': ['assessment', 'evaluation', 'performance', 'achievement'],\n
    'Personalization': ['personalized', 'adaptive', 'individualized', 'customized'],\n
    'STEM': ['stem', 'science', 'technology', 'engineering'],\n
    'Digital': ['digital', 'online', 'virtual', 'remote', 'distance learning']\n
}\n
\n
# Contar términos por categoría\n
category_counts = {}\n
for category, terms in term_categories.items():\n
    count = 0\n
    for term in terms:\n
        count += title_term_counts.get(term, 0) + abstract_term_counts.get(term, 0)\n
    category_counts[category] = count\n
\n
print(\"=== FRECUENCIA POR CATEGORÍA DE TÉRMINOS ===\")\n
for category, count in sorted(category_counts.items(), key=lambda x: x[1], reverse=True):\n
    print(f\"{category}: {count} apariciones\")\n
\n
# Gráfico de barras para categorías\n
plt.figure(figsize=(12, 8))\n
bars = plt.bar(range(len(category_counts)), list(category_counts.values()), color='lightgreen', alpha=0.7)\n
plt.xlabel('Categoría de Términos', fontsize=14)\n
plt.ylabel('Frecuencia Total', fontsize=14)\n
plt.title('Frecuencia de Términos por Categoría', fontsize=16, fontweight='bold')\n
plt.xticks(range(len(category_counts)), list(category_counts.keys()), rotation=45, ha='right')\n
plt.grid(True, alpha=0.3, axis='y')\n
\n
# Agregar valores en las barras\n
for i, (bar, count) in enumerate(zip(bars, category_counts.values())):\n
    plt.text(i, count + 0.5, str(count), ha='center', va='bottom', fontweight='bold')\n
\n
plt.tight_layout()\n
plt.show()

In [None]:
# Gráfico de pastel para categorías\n
plt.figure(figsize=(12, 8))\n
colors = plt.cm.Set3(np.linspace(0, 1, len(category_counts)))\n
\n
wedges, texts, autotexts = plt.pie(list(category_counts.values()), labels=list(category_counts.keys()), \n
                                    autopct='%1.1f%%', colors=colors, startangle=90)\n
\n
plt.title('Distribución de Términos por Categoría', fontsize=16, fontweight='bold')\n
plt.axis('equal')\n
\n
# Mejorar la legibilidad de las etiquetas\n
for autotext in autotexts:\n
    autotext.set_color('white')\n
    autotext.set_fontweight('bold')\n
\n
plt.tight_layout()\n
plt.show()

## 6. Análisis Temporal de Términos

In [None]:
# Convertir Year a numérico\n
df['Year'] = pd.to_numeric(df['Year'], errors='coerce')\n
\n
# Análisis temporal de términos\n
temporal_terms = {}\n
\n
for year in sorted(df['Year'].unique()):\n
    if pd.notna(year):\n
        year_data = df[df['Year'] == year]\n
        year_terms = []\n
        \n
        for terms in year_data['Title_Terms']:\n
            year_terms.extend(terms)\n
        for terms in year_data['Abstract_Terms']:\n
            year_terms.extend(terms)\n
        \n
        temporal_terms[year] = Counter(year_terms)\n
\n
print(\"=== EVOLUCIÓN TEMPORAL DE TÉRMINOS ===\")\n
for year, term_counts in temporal_terms.items():\n
    print(f\"\\nAño {year}:\")\n
    for term, count in term_counts.most_common(5):\n
        print(f\"  {term}: {count} apariciones\")

In [None]:
# Gráfico de evolución temporal de términos principales\n
main_terms = ['artificial intelligence', 'machine learning', 'mathematics', 'education', 'k-12']\n
\n
plt.figure(figsize=(15, 8))\n
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']\n
\n
for i, term in enumerate(main_terms):\n
    term_counts = []\n
    years = []\n
    \n
    for year in sorted(temporal_terms.keys()):\n
        years.append(year)\n
        count = temporal_terms[year].get(term, 0)\n
        term_counts.append(count)\n
    \n
    \n
    plt.plot(years, term_counts, marker='o', linewidth=2, label=term, color=colors[i])\n
\n
plt.title('Evolución Temporal de Términos Principales', fontsize=16, fontweight='bold')\n
plt.xlabel('Año', fontsize=14)\n
plt.ylabel('Frecuencia', fontsize=14)\n
plt.legend(title='Términos', bbox_to_anchor=(1.05, 1), loc='upper left')\n
plt.grid(True, alpha=0.3)\n
plt.xticks(rotation=45)\n
plt.tight_layout()\n
plt.show()

## 7. Análisis de Co-ocurrencia de Términos

In [None]:
# Análisis de co-ocurrencia de términos\n
co_occurrence = {}\n
\n
for _, row in df.iterrows():\n
    all_terms = row['Title_Terms'] + row['Abstract_Terms']\n
    \n
    for i, term1 in enumerate(all_terms):\n
        for j, term2 in enumerate(all_terms):\n
            if i < j:\n
                pair = tuple(sorted([term1, term2]))\n
                co_occurrence[pair] = co_occurrence.get(pair, 0) + 1\n
\n
# Top co-ocurrencias\n
top_co_occurrences = sorted(co_occurrence.items(), key=lambda x: x[1], reverse=True)[:15]\n
\n
print(\"=== TOP 15 CO-OCURRENCIAS DE TÉRMINOS ===\")\n
for i, ((term1, term2), count) in enumerate(top_co_occurrences, 1):\n
    print(f\"{i:2d}. {term1} + {term2}: {count} co-ocurrencias\")\n
\n
# Gráfico de co-ocurrencias\n
if top_co_occurrences:\n
    co_occurrence_df = pd.DataFrame(top_co_occurrences, columns=['Terms', 'Count'])\n
    co_occurrence_df['Term_Pair'] = [f\"{t1} + {t2}\" for (t1, t2) in co_occurrence_df['Terms']]\n
    \n
    plt.figure(figsize=(12, 8))\n
    bars = plt.barh(range(len(co_occurrence_df)), co_occurrence_df['Count'], color='lightblue', alpha=0.7)\n
    plt.yticks(range(len(co_occurrence_df)), co_occurrence_df['Term_Pair'], fontsize=10)\n
    plt.xlabel('Número de Co-ocurrencias', fontsize=14)\n
    plt.title('Top 15 Co-ocurrencias de Términos', fontsize=16, fontweight='bold')\n
    plt.grid(True, alpha=0.3, axis='x')\n
    \n
    # Agregar valores en las barras\n
    for i, count in enumerate(co_occurrence_df['Count']):\n
        plt.text(count + 0.1, i, str(count), va='center', fontweight='bold')\n
    \n
    plt.tight_layout()\n
    plt.show()

## 8. Resumen y Conclusiones

In [None]:
# Generar resumen ejecutivo\n
print(\"=== RESUMEN EJECUTIVO ===\\n\")\n
\n
print(f\"📊 Total de términos únicos en títulos: {len(title_term_counts)}\")\n
print(f\"📝 Total de términos únicos en abstracts: {len(abstract_term_counts)}\")\n
print(f\"📈 Término más frecuente en títulos: {title_term_counts.most_common(1)[0][0]} ({title_term_counts.most_common(1)[0][1]} apariciones)\")\n
print(f\"📉 Término más frecuente en abstracts: {abstract_term_counts.most_common(1)[0][0]} ({abstract_term_counts.most_common(1)[0][1]} apariciones)\")\n
\n
# Análisis de categorías\n
most_common_category = max(category_counts, key=category_counts.get)\n
print(f\"🏆 Categoría más frecuente: {most_common_category} ({category_counts[most_common_category]} apariciones)\")\n
\n
# Análisis de co-ocurrencia\n
if top_co_occurrences:\n
    most_common_co_occurrence = top_co_occurrences[0]\n
    print(f\"🔗 Co-ocurrencia más frecuente: {most_common_co_occurrence[0][0]} + {most_common_co_occurrence[0][1]} ({most_common_co_occurrence[1]} veces)\")\n
\n
print(\"\\n=== CONCLUSIONES ===\")\n
print(\"1. Los términos de IA y ML dominan la literatura\")\n
print(\"2. Los términos educativos son fundamentales\")\n
print(\"3. Hay una evolución temporal en el uso de términos\")\n
print(\"4. Existen patrones de co-ocurrencia significativos\")