This experiment deals with the co-occurence of concepts on the same page or surrounding pages. It is based on the assumption that dependencies are used within close proximity.
Pseudo-algorithm:
1. For each concept: Find all other concepts that are 1. introduced before but 2. mentioned again within proximity of the introduction of the concept (1-2 page padding).

In [2]:
import pandas as pd
import numpy as np
from collections import Counter
import sys
sys.path.append('../src/')
import data_layer

In [11]:
MAX_K=5
PAGES_BEFORE=3
PAGES_AFTER=3

In [3]:
book_indices, wiki_concepts = data_layer.read_index_and_wiki_concepts()

In [4]:
def get_page_list(page_string):
    if not isinstance(page_string, float) and "nan" in page_string: return []
    pages = [page_string] if isinstance(page_string, float) else page_string.split(",")
    return [int(page) for page in pages]

def get_df_page_concepts(wiki_concepts, wiki_pages):
    page_nums=[]
    concepts=[]
    for concept, pages in zip(wiki_concepts, wiki_pages):
        for page in get_page_list(pages):
            page_nums.append(page)
            concepts.append(concept)
    return pd.DataFrame({"page": page_nums, "concept": concepts})

In [5]:
potential_deps = {concept: [] for concept in wiki_concepts}

for book, index in book_indices.items():
    first_page = {concept: int(pages[0]) for concept, pages in zip(index.wiki_concept, index.pages)}
    df_pages = get_df_page_concepts(index.wiki_concept, index.pages)

    for concept, page in zip(index.wiki_concept, index.first_page):
        deps = list(df_pages.loc[(df_pages.page > page - PAGES_BEFORE) & (df_pages.page < page + PAGES_AFTER),"concept"])
        deps.remove(concept)
        potential_deps[concept] += deps

In [12]:
def get_frequent(deps):
    c = Counter(deps)
    return [d[0] for d in c.most_common(MAX_K) if c[d[0]]>1]

df_concepts = pd.DataFrame({'concept': potential_deps.keys(), 'dep_articles': [get_frequent(deps) for deps in potential_deps.values()]})

In [17]:
for c, d in zip(df_concepts.concept, df_concepts.dep_articles):
    print(f"{c}: {d}")

Jacobi operator: ['Inner product space']
Inverse hyperbolic functions: []
Substitution cipher: ['System of linear equations']
List of numerical analysis topics: []
History of the function concept: []
Rotation (mathematics): ['Euler angles', 'Hadamard matrix']
Fibonacci: ['Recurrence plot']
Random number generation: []
Empty set: ['Union (set theory)', 'Vector notation', 'Set (mathematics)', 'Subset', 'Ring (mathematics)']
Electromagnetic tensor: []
Parabola: []
Regression analysis: []
Mind map: []
Antiunitary operator: []
Vertical line test: []
Precision (computer science): ["Gauss's method", 'Data analysis']
Householder transformation: []
Autonomous system (mathematics): ['System of linear equations', 'Consistency', 'Ω-consistent theory']
Asymptote: ['Vertical tangent']
Chemical reaction network theory: []
Critical exponent: ['Complex number']
Fast Fourier transform: []
Group testing: []
Fermat's principle: []
Spectral density: []
Universal approximation theorem: []
Equation: ['Vacuum

In [13]:
df_concepts.to_json("../dat/textbooks/de2.json")