In [None]:
import requests
import matplotlib.pyplot as plt

def fetch_ngram(phrases, year_start, year_end, corpus=26, smoothing=3):
    """
    Fetch n-gram data from Google Ngram Viewer.

    :param phrases: List of phrases to search for.
    :param year_start: Start year for the search.
    :param year_end: End year for the search.
    :param corpus: Corpus ID (e.g., 26 for English).
    :param smoothing: Smoothing parameter for the graph.
    :return: Parsed JSON response containing n-gram data.
    """
    base_url = "https://books.google.com/ngrams/json"
    params = {
        'content': ','.join(phrases),
        'year_start': year_start,
        'year_end': year_end,
        'corpus': corpus,
        'smoothing': smoothing
    }
    response = requests.get(base_url, params=params)
    if response.status_code != 200:
        raise Exception(f"Failed to fetch data: {response.status_code} - {response.text}")
    return response.json()

def plot_ngram(results, title):
    """
    Plot n-gram timeseries data.

    :param results: List of n-gram results from the API.
    :param title: Title for the plot.
    """
    plt.figure(figsize=(12, 6))
    for result in results:
        years = list(range(YEAR_START, YEAR_END + 1))
        frequencies = result['timeseries']
        label = result['ngram']
        plt.plot(years, frequencies, label=label)
    
    plt.xlabel("Year")
    plt.ylabel("Relative Frequency")
    plt.title(title)
    plt.legend()
    plt.grid(True)
    plt.show()

def num_to_words(n):
    """
    Convert a number to its word equivalent (e.g., 3 -> 'three').
    
    :param n: Integer number to convert.
    :return: String representation of the number in words.
    """
    words = {
        1: "one", 2: "two", 3: "three", 4: "four", 5: "five", 
        6: "six", 7: "seven", 8: "eight", 9: "nine", 10: "ten"
    }
    return words.get(n, str(n))

# Define global variables for year range
YEAR_START = 1900
YEAR_END = 2022

if __name__ == "__main__":
    # Define shapes and their respective number of sides
    shapes = {
        "triangle": 3,
        "square": 4,
        "pentagon": 5,
        "hexagon": 6,
        "heptagon": 7,
        "octagon": 8
    }
    
    # Case 1: Plot for shapes
    shape_names = list(shapes.keys())
    shape_results = fetch_ngram(shape_names, YEAR_START, YEAR_END)
    plot_ngram(shape_results, "Shape Names (1500–2022)")
    
    # Case 2: Plot for "shape has X sides"
    side_phrases = [f"{shape} has {sides} sides" for shape, sides in shapes.items()]
    side_results = fetch_ngram(side_phrases, YEAR_START, YEAR_END)
    plot_ngram(side_results, "Shapes with Number of Sides (1500–2022)")

    # Case 2b: Plot for "shape has three sides" (numbers in words)
    side_phrases_words = [f"{shape} has {num_to_words(sides)} sides" for shape, sides in shapes.items()]
    side_results_words = fetch_ngram(side_phrases_words, YEAR_START, YEAR_END)
    plot_ngram(side_results_words, "Shapes with Worded Number of Sides (e.g., 'triangle has three sides')")
    
    # Case 3: Plot for combinations like "triangle 3", "3 triangle"
    # Case 3: Plot for "triangle three" and "triangle 3" varieties
    # Plot 1: "triangle three" variety
    phrases_three = [f"{shape} {num_to_words(sides)}" for shape, sides in shapes.items()]  # e.g., "triangle three"
    results_three = fetch_ngram(phrases_three, YEAR_START, YEAR_END)
    plot_ngram(results_three, "Shape + Word (e.g., 'triangle three') Combinations (1500–2022)")
    
    # Plot 2: "triangle 3" variety
    phrases_number = [f"{shape} {sides}" for shape, sides in shapes.items()]  # e.g., "triangle 3"
    results_number = fetch_ngram(phrases_number, YEAR_START, YEAR_END)
    plot_ngram(results_number, "Shape + Number (e.g., 'triangle 3') Combinations (1500–2022)")

In [None]:
def combine_frequencies(results_word, results_number, suffix=""):
    """
    Combine frequencies of phrases with worded numbers and numeric numbers.

    :param results_word: List of results with worded numbers (e.g., "triangle has three sides").
    :param results_number: List of results with numeric numbers (e.g., "triangle has 3 sides").
    :param suffix: Optional suffix to append to the combined label (e.g., " sides").
    :return: Combined results as a list of dictionaries.
    """
    combined_results = []

    # Ensure both lists have the same order
    for word_result, number_result in zip(results_word, results_number):
        # Combine timeseries
        combined_timeseries = [
            w + n for w, n in zip(word_result['timeseries'], number_result['timeseries'])
        ]

        # Dynamically extract the correct worded and numeric forms
        word_label = word_result['ngram']  # e.g., "triangle has three sides"
        number_label = number_result['ngram']  # e.g., "triangle has 3 sides"

        # Extract the base of the phrase (everything before the number/word form)
        base_label = " ".join(word_label.split()[:-2])  # Remove "three sides"
        word_number = word_label.split()[-2]  # Extract "three"
        numeric_number = number_label.split()[-2]  # Extract "3"

        # Create the combined label (specific to each shape and number)
        combined_label = f"{base_label} {word_number}/{numeric_number}{suffix}"

        # Append combined result
        combined_results.append({
            'ngram': combined_label,
            'timeseries': combined_timeseries
        })

    return combined_results


shapes = {
    "triangle": 3,
    "square": 4,
    "pentagon": 5,
    "hexagon": 6,
    "heptagon": 7,
    "octagon": 8
}

# Case 1: Plot for shapes
shape_names = list(shapes.keys())
shape_results = fetch_ngram(shape_names, YEAR_START, YEAR_END)
plot_ngram(shape_results, "Shape Names (1900–2022)")

# Case 2: Combine "shape has 3 sides" and "shape has three sides"
side_phrases_number = [f"{shape} has {sides} sides" for shape, sides in shapes.items()]  # e.g., "triangle has 3 sides"
side_phrases_words = [f"{shape} has {num_to_words(sides)} sides" for shape, sides in shapes.items()]  # e.g., "triangle has three sides"

results_number = fetch_ngram(side_phrases_number, YEAR_START, YEAR_END)
results_words = fetch_ngram(side_phrases_words, YEAR_START, YEAR_END)

# Combine frequencies for "shape has 3 sides" and "shape has three sides"
combined_side_results = combine_frequencies(results_words, results_number, " sides")
plot_ngram(combined_side_results, "Shapes with Combined Number/Word Sides (e.g., 'triangle has three/3 sides')")

    