<a href="https://colab.research.google.com/github/klobell/cryptograms/blob/main/cryptogram_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Cryptogram Project

## Setup: Install Dependencies & Import Libraries

In [3]:
import os
!pip install -r requirements.txt --quiet

# --- Make src/ folder importable ---
import sys
sys.path.append('src')

# --- External packages (dependencies) ---
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
import requests
from bs4 import BeautifulSoup

# --- Standard libraries (built-in) ---
import re
import time
from collections import Counter


## Scraping Training Data

In [None]:
n = 600            # how many puzzles to scrape

url = "https://cryptograms.puzzlebaron.com/play.php"

headers = {
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
                  "(KHTML, like Gecko) Chrome/123.0 Safari/537.36"
}

session = requests.Session()

data = []

label_map = {
    "easy": "easy",
    "average": "medium",
    "difficult": "hard",
}



# --------------




for i in range(n):
      resp = session.get(url, headers=headers)
      resp.raise_for_status()
      soup = BeautifulSoup(resp.text, "html.parser")

      # ------- difficulty -------
      # find bold "Rating" label, then read the text
      parent_div = soup.find("b", string=re.compile(r"^\s*Rating\s*$", re.I)).parent
      # get strings inside div
      pieces = list(parent_div.stripped_strings)
      if len(pieces) >= 2:
          difficulty_raw = pieces[1].lower()
          difficulty = label_map[difficulty_raw]

      # ------- ciphertext -------
      words = []
      for wdiv in soup.find_all("div", class_="word"):
          letters = [c.get_text(strip=True) for c in wdiv.find_all("div", class_="cletter")]
          if letters:
              words.append("".join(letters))
      ciphertext = " ".join(words).strip()

      if ciphertext and difficulty:
          data.append({"ciphertext": ciphertext, "difficulty": difficulty})
      else:
          # debug prints (if structure changes)
          print(f"[{i+1}/{n}] skipped (ciphertext or difficulty missing)")

      time.sleep(1.0)

# --- save ---
train_df = pd.DataFrame(data)
train_df.to_csv("train_df.csv", index=False)
train_df.head(3)


Unnamed: 0,ciphertext,difficulty
0,JOBC BCY STBSCOLM YLK BCY XWYTVHNYV PA BCY SCTVY,hard
1,SZ MTM HGN FPWHKZ WV SZ KQZS GYMZQ SZ XCVN JZF...,hard
2,SAN WQUNFH QR FQKLMVN VLM YQ NWHNIANFN RQF HLS...,easy


In [16]:
train_df = pd.read_csv("data/train_df.csv")

train_df["ciphertext"].nunique() == n       # make sure there are no duplicate ciphertexts
train_df["difficulty"].value_counts()       # rating value counts

X_train = train_df["ciphertext"]
y_train = train_df["difficulty"]

Unnamed: 0_level_0,count
difficulty,Unnamed: 1_level_1
medium,313
easy,144
hard,143


## Importing featurizers and encrypt functions

In [4]:
import sys
sys.path.append('src')
from src.featurizer import CryptogramFeaturizer, RepeatNgramCounter
from src.encryptor import generate_derangement, encrypt


## Baseline Model: TF-IDF + LinearSVC

In [None]:
from sklearn.svm import LinearSVC
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.utils import shuffle
from sklearn.pipeline import make_pipeline
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np


# Create the pipeline
clf = make_pipeline(
    RepeatNgramCounter(),  # 3 features: repeated unigrams, bigrams, trigrams
    LinearSVC(class_weight='balanced', random_state=42)
)

# 5-fold stratified CV
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Real score
real = cross_val_score(clf, X_train, y_train, cv=cv, scoring="f1_macro").mean()

# Permuted labels
y_perm = shuffle(y_train, random_state=0)
perm = cross_val_score(clf, X_train, y_perm, cv=cv, scoring="f1_macro").mean()

print("Real macro-F1:", real)
print("Permuted macro-F1:", perm)


Real macro-F1: 0.24202082266598396
Permuted macro-F1: 0.24065540194572455


## Pipeline1 (K Neighbors)

In [17]:
# ---------------- Pipeline1 ----------------

pipeline1 = Pipeline([
    ("features", CryptogramFeaturizer()),
    ("scaler", StandardScaler()),
    ("select", SelectKBest(score_func=f_classif)),
    ("knn", KNeighborsClassifier())
])


# ---------------- Grid search ----------------

grid_cv = GridSearchCV(
    pipeline1,
    param_grid={
        "select__k": [3, 5, 8, 13, "all"],
        "knn__n_neighbors": range(2, 7),
        "knn__metric": ["euclidean", "manhattan"]
    },
    scoring="f1_macro",
    cv=5,
    return_train_score=True
)

# ---------------- Fit ----------------

grid_cv.fit(X_train, y_train)

# ---------------- Results ----------------

print("Best score:", grid_cv.best_score_)
print("Best params:", grid_cv.best_params_)

pipeline1.set_params(**grid_cv.best_params_)
pipeline1.fit(X_train, y_train)




Best score: 0.3803474861668076
Best params: {'knn__metric': 'manhattan', 'knn__n_neighbors': 5, 'select__k': 3}


## Pipeline2 (Random Forest)

In [None]:
# ---------------- Pipeline2 ----------------
pipeline2 = Pipeline([
    ("features", CryptogramFeaturizer()),
    ("select", SelectKBest(score_func=f_classif)),
    ("clf", RandomForestClassifier(random_state = 67))
])

# ---------------- Grid search ----------------
param_grid = {
    "select__k": [5, 8, 13, "all"],
    "clf__n_estimators": [100, 200],
    "clf__max_depth": [None, 10],
    "clf__min_samples_split": [5, 10, 15],
    "clf__min_samples_leaf": [1, 3, 5],
    "clf__max_features": ["sqrt", "log2", None]  # features considered at each split
}

grid_cv = GridSearchCV(
    pipeline2,
    param_grid=param_grid,
    scoring="f1_macro",
    cv=5,
    return_train_score=True,
)

# ---------------- Fit ----------------
grid_cv.fit(X_train, y_train)

# ---------------- Results ----------------

print("Best score:", grid_cv.best_score_)
print("Best params:", grid_cv.best_params_)

pipeline2.set_params(**grid_cv.best_params_)
pipeline2.fit(X_train, y_train)


Best score: 0.34838598897811857
Best params: {'clf__max_depth': 10, 'clf__max_features': None, 'clf__min_samples_leaf': 1, 'clf__min_samples_split': 10, 'clf__n_estimators': 100, 'select__k': 5}


## Pipeline3 (Gradient Boosting)

In [None]:
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import classification_report

# ---------------- Updated pipeline ----------------
pipeline3 = Pipeline([
    ("features", CryptogramFeaturizer()),
    ("scaler", StandardScaler()),
    ("select", SelectKBest(score_func=f_classif, k="all")),
    ("gb", HistGradientBoostingClassifier(random_state=42))
])


# ---------------- Results ----------------
scores = cross_val_score(pipeline3, X_train, y_train, cv=5, scoring="f1_macro")
print("Estimated test macro-F1:", scores.mean())


# ---------------- Fit ----------------
pipeline3.fit(X_train, y_train)


Estimated test macro-F1: 0.3406770764814519


## Scraping funny quotes

In [5]:
url = "https://www.rd.com/list/funniest-quotes-all-time/"

# Set headers to mimic a real browser
headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/116.0.5845.96 Safari/537.36"}

response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

quotes = []
authors = []

for card in soup.find_all("div", class_="listicle-card"):
    # Extract <p> tags inside the card
    for p in card.find_all("p"):
        text = p.get_text(strip=True, separator=" ")
        # Check for author in <i> or <em> tags
        if p.find("i"):
            author = p.find("i").get_text(strip=True)
        elif p.find("em"):
            author = p.find("em").get_text(strip=True)
        else:
            author = None

        # Remove author text from quote
        if author:
            quote_text = text.replace(author, "").replace("—", "").strip()
        else:
            quote_text = text.strip()

        # Skip empty quotes or titles
        if quote_text:
            # Remove numbers and quotes from quote
            quote_text = re.sub(r'^\d+\.\s*', '', quote_text)
            quote_text = quote_text.strip('"').strip('"').strip()

            # Remove — from author if it exists
            if author and author.startswith('—'):
                author = author[1:].strip()

            quotes.append(quote_text)
            authors.append(author)


# Create dataframe
funny_quotes_df = pd.DataFrame({"quote": quotes, "author": authors})
funny_quotes_df.head()



Unnamed: 0,quote,author
0,Whether you’re looking for funny quotes to mak...,Reader’s Digest
1,"rd.com, Getty Images",
2,“My advice to you is get married: If you find ...,Socrates
3,“If you want to be sure that you never forget ...,Aldo Cammarota
4,"“Before you marry a person, you should first m...",Will Ferrell


In [6]:
# cleaning up funny_quotes_df

# Remove bad rows
funny_quotes_df = funny_quotes_df.drop([0, 39, 60, 69, 74]).reset_index(drop=True)

# remove duplicates + Remove rows where quote contains 'rd.com' or 'Getty Images'
funny_quotes_df = funny_quotes_df.drop_duplicates().reset_index(drop=True)
funny_quotes_df = funny_quotes_df[~funny_quotes_df['quote'].str.contains('rd.com|Getty Images', na=False)]

# Replacing typos
replacements = {'complainand': 'complain and', 'hearbut': 'hear but', 'travelfirst': 'travel—first'}
for old, new in replacements.items():
    funny_quotes_df['quote'] = funny_quotes_df['quote'].str.replace(old, new)

# Remove extra spaces/characters before and after quotes
funny_quotes_df['quote'] = funny_quotes_df['quote'].apply(
    lambda x: re.sub(r'^[\s\u200b\u00a0“]+|[\s\u200b\u00a0”]+$', '', x)
)

# Handle quotes that start with numbers and have names after
for idx, row in funny_quotes_df.iterrows():
   quote = row['quote']

   # Check if quote starts with number
   match = re.match(r'^(\d+)\s*"(.+?)"\s*(.+)$', quote)
   if match:
       number = match.group(1)
       clean_quote = match.group(2).strip()
       potential_author = match.group(3).strip()

       # Update quote and author
       funny_quotes_df.at[idx, 'quote'] = clean_quote
       funny_quotes_df.at[idx, 'author'] = potential_author

# Fix the William Tell Overture row specifically
william_tell_mask = funny_quotes_df['quote'].str.contains('William Tell Overture', na=False)
if william_tell_mask.any():
   idx = funny_quotes_df[william_tell_mask].index[0]
   funny_quotes_df.at[idx, 'quote'] = "My definition of an intellectual is someone who can listen to the William Tell Overture without thinking of the Lone Ranger."
   funny_quotes_df.at[idx, 'author'] = "Billy Connolly"

# Fix the "There but for the grace of God" row
grace_mask = funny_quotes_df['quote'].str.contains('There but for the grace of God', na=False)
if grace_mask.any():
   idx = funny_quotes_df[grace_mask].index[0]
   funny_quotes_df.at[idx, 'quote'] = "There but for the grace of God, goes God."
   funny_quotes_df.at[idx, 'author'] = "Anonymous"

# Fix the Dan Bennett quote quote row
dan_bennett_mask = funny_quotes_df['quote'].str.contains('Dan Bennett', na=False)
if dan_bennett_mask.any():
   idx = funny_quotes_df[dan_bennett_mask].index[0]
   funny_quotes_df.at[idx, 'quote'] = "A gossip is a person who creates the smoke in which other people assume there's fire"
   funny_quotes_df.at[idx, 'author'] = "Dan Bennett"

# Replace author text "Steve Martin, in the film" with just "Steve Martin"
funny_quotes_df.loc[funny_quotes_df['author'] == "Steve Martin, in the film", 'author'] = "Steve Martin"


# Reset index after all changes
funny_quotes_df = funny_quotes_df.reset_index(drop=True)

funny_quotes_df.to_csv('funny_quotes.csv', index=False)
funny_quotes_df.head()

Unnamed: 0,quote,author
0,My advice to you is get married: If you find a...,Socrates
1,If you want to be sure that you never forget y...,Aldo Cammarota
2,"Before you marry a person, you should first ma...",Will Ferrell
3,Never criticize your spouse’s faults; if it we...,Jay Trachman
4,Never go to bed mad. Stay up and fight.,Phyllis Diller


In [7]:
for i, value in enumerate(funny_quotes_df['quote']):
    print(i, value)



0 My advice to you is get married: If you find a good wife you’ll be happy; if not, you’ll become a philosopher.
1 If you want to be sure that you never forget your wife’s birthday, just try forgetting it once.
2 Before you marry a person, you should first make them use a computer with slow Internet service to see who they really are.
3 Never criticize your spouse’s faults; if it weren’t for them, your mate might have found someone better than you.
4 Never go to bed mad. Stay up and fight.
5 Instead of getting married again, I’m going to find a woman I don’t like and give her a house.
6 Behind every great man is a woman rolling her eyes.
7 Adults are always asking children what they want to be when they grow up because they’re looking for ideas.
8 A perfect parent is a person with excellent child-rearing theories and no actual children.
9 Just be good and kind to your children. Not only are they the future of the world, they’re the ones who can sign you into a home.
10 When I was a kid

## Making funny cryptograms csvs

In [8]:
min_len = 60      # minimum cryptogram length
max_len = 150      # maximum cryptogram length

funny_cryptograms_df = pd.read_csv("funny_quotes.csv")

funny_cryptograms_df = funny_quotes_df[funny_quotes_df["quote"].str.len().between(min_len, max_len)].reset_index(drop=True)

funny_cryptograms_df.shape


(78, 2)

In [11]:
# encrypting quote author pairs
import string
import random

letters = list(string.ascii_uppercase)
X_test = []
encrypted_authors = []

for quote, author in zip(funny_cryptograms_df["quote"], funny_cryptograms_df["author"]):
    # Generate a deranged mapping for this row
    mapping = generate_derangement(letters)

    # Apply the same mapping to both quote and author
    X_test.append(encrypt(quote, mapping).upper())
    encrypted_authors.append(encrypt(author, mapping).upper())



In [18]:
predictions = pipeline1.predict(X_test)

funny_cryptograms_df = pd.DataFrame({
    'Cryptogram': X_test,
    'Encrypted Author': encrypted_authors,
    'Rating': predictions,
    'Quote': funny_cryptograms_df["quote"],
    'Author': funny_cryptograms_df["author"]
})

funny_cryptograms_df.head()

Unnamed: 0,Cryptogram,Encrypted Author,Rating,Quote,Author
0,LC FOPREA KT CTS RN VAK LFUURAO: RY CTS YRQO F...,NTEUFKAN,medium,My advice to you is get married: If you find a...,Socrates
1,VT ELY OJWK KL QB NYZB KDJK ELY WBCBZ TLZHBK E...,JSUL IJGGJZLKJ,medium,If you want to be sure that you never forget y...,Aldo Cammarota
2,"FJMICJ BIT EDCCB D KJCPIZ, BIT PNITSV MUCPW ED...",AUSS MJCCJSS,easy,"Before you marry a person, you should first ma...",Will Ferrell
3,SJIJO KOWLWKWDJ NUEO ATUEAJ’A XCEHLA; WX WL FJ...,RCN LOCKQGCS,medium,Never criticize your spouse’s faults; if it we...,Jay Trachman
4,"YEMSRJV NP WRSSYEW IJUUYRV JWJYE, Y’I WNYEW SN...",UNV MSRDJUS,easy,"Instead of getting married again, I’m going to...",Rod Stewart


In [19]:
# exporting final main csv
funny_cryptograms_df.to_csv('funny_cryptograms.csv', index=False)


In [20]:
# exporting other csvs


# 1. Easy quotes
df_easy = funny_cryptograms_df[funny_cryptograms_df["Rating"] == "easy"]
df_easy.to_csv("funny_cryptograms_easy.csv", index=False)

# 2. Medium quotes
df_medium = funny_cryptograms_df[funny_cryptograms_df["Rating"] == "medium"]
df_medium.to_csv("funny_cryptograms_medium.csv", index=False)

# 3. Hard quotes
df_hard = funny_cryptograms_df[funny_cryptograms_df["Rating"] == "hard"]
df_hard.to_csv("funny_cryptograms_hard.csv", index=False)

# 4. Non-anonymous quotes, sorted by difficulty
df_not_anonymous = funny_cryptograms_df[
    funny_cryptograms_df["Author"].str.lower() != "Anonymous"
].sort_values(by = "Rating")
df_not_anonymous.to_csv("funny_cryptograms_not_anonymous.csv", index=False)


# Exporting Printables

## Funny hard cryptograms

In [None]:
# !pip install python-docx
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_LINE_SPACING
from docx.enum.text import WD_BREAK
from docx.enum.text import WD_ALIGN_PARAGRAPH



# Load csv
df = pd.read_csv("funny_cryptograms_hard.csv")

# Create new doc
doc = Document()

# Title
doc.add_heading("Hard Funny Cryptogram Puzzles", 0)
doc.add_paragraph("\n")  # extra spacing after title

# Cryptograms section

for i, (quote, author) in enumerate(zip(df["Cryptogram"], df["Encrypted Author"]), start=1):
    # Add puzzle number
    p_number = doc.add_paragraph(f"Puzzle {i}")
    p_number.runs[0].font.size = Pt(13)

    # Add the quote paragraph with bigger font and line spacing
    p_quote = doc.add_paragraph(quote)
    run = p_quote.runs[0]
    run.font.name = 'Arial'
    run.font.size = Pt(17)

    # Set line spacing (bigger space between lines in the paragraph)
    p_quote.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY
    p_quote.paragraph_format.line_spacing = Pt(40)  # spacing between lines

    # Add author below
    p_author = doc.add_paragraph(f"({author})")
    run_author = p_author.runs[0]
    run_author.font.name = 'Arial'
    run_author.font.size = Pt(15)
    p_author.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY
    p_author.paragraph_format.line_spacing = Pt(24)

    # Extra blank paragraph for spacing between puzzles
    doc.add_paragraph("\n" * 3)

# After the last puzzle, add the "Answers on next page" note
p_note = doc.add_paragraph("(Answers on next page)")
p_note.alignment = WD_ALIGN_PARAGRAPH.CENTER
p_note.runs[0].font.size = Pt(18)

# Page break before answers
doc.add_page_break()

# Answers section

doc.add_heading("Answers", level=1)
doc.add_paragraph("\n")  # optional spacing

for i, (quote, author) in enumerate(zip(df["Quote"], df["Author"]), start=1):
    p_answer = doc.add_paragraph(f"Puzzle {i}: {quote} ({author})")
    p_answer.runs[0].font.size = Pt(14)
    p_answer.paragraph_format.line_spacing_rule = WD_LINE_SPACING.SINGLE

# Save the document
doc.save("funny_hard_cryptograms.docx")


## Funny medium cryptograms

In [None]:
# !pip install python-docx
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_LINE_SPACING
from docx.enum.text import WD_BREAK
from docx.enum.text import WD_ALIGN_PARAGRAPH



# Load csv
df = pd.read_csv("funny_cryptograms_medium.csv")

# Create new doc
doc = Document()

# Title
doc.add_heading("Medium Funny Cryptogram Puzzles", 0)
doc.add_paragraph("\n")  # extra spacing after title

# Cryptograms section

for i, (quote, author) in enumerate(zip(df["Cryptogram"], df["Encrypted Author"]), start=1):
    # Add puzzle number
    p_number = doc.add_paragraph(f"Puzzle {i}")
    p_number.runs[0].font.size = Pt(13)

    # Add the quote paragraph with bigger font and line spacing
    p_quote = doc.add_paragraph(quote)
    run = p_quote.runs[0]
    run.font.name = 'Arial'
    run.font.size = Pt(17)

    # Set line spacing (bigger space between lines in the paragraph)
    p_quote.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY
    p_quote.paragraph_format.line_spacing = Pt(40)  # spacing between lines

    # Add author below
    p_author = doc.add_paragraph(f"({author})")
    run_author = p_author.runs[0]
    run_author.font.name = 'Arial'
    run_author.font.size = Pt(15)
    p_author.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY
    p_author.paragraph_format.line_spacing = Pt(24)

    # Extra blank paragraph for spacing between puzzles
    doc.add_paragraph("\n" * 3)

# After the last puzzle, add the "Answers on next page" note
p_note = doc.add_paragraph("(Answers on next page)")
p_note.alignment = WD_ALIGN_PARAGRAPH.CENTER
p_note.runs[0].font.size = Pt(18)

# Page break before answers
doc.add_page_break()

# Answers section

doc.add_heading("Answers", level=1)
doc.add_paragraph("\n")  # optional spacing

for i, (quote, author) in enumerate(zip(df["Quote"], df["Author"]), start=1):
    p_answer = doc.add_paragraph(f"Puzzle {i}: {quote} ({author})")
    p_answer.runs[0].font.size = Pt(14)
    p_answer.paragraph_format.line_spacing_rule = WD_LINE_SPACING.SINGLE

# Save the document
doc.save("funny_medium_cryptograms.docx")


## Funny easy cryptograms

In [None]:
# !pip install python-docx
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_LINE_SPACING
from docx.enum.text import WD_BREAK
from docx.enum.text import WD_ALIGN_PARAGRAPH



# Load csv
df = pd.read_csv("funny_cryptograms_easy.csv")

# Create new doc
doc = Document()

# Title
doc.add_heading("Easy Funny Cryptogram Puzzles", 0)
doc.add_paragraph("\n")  # extra spacing after title

# Cryptograms section

for i, (quote, author) in enumerate(zip(df["Cryptogram"], df["Encrypted Author"]), start=1):
    # Add puzzle number
    p_number = doc.add_paragraph(f"Puzzle {i}")
    p_number.runs[0].font.size = Pt(13)

    # Add the quote paragraph with bigger font and line spacing
    p_quote = doc.add_paragraph(quote)
    run = p_quote.runs[0]
    run.font.name = 'Arial'
    run.font.size = Pt(17)

    # Set line spacing (bigger space between lines in the paragraph)
    p_quote.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY
    p_quote.paragraph_format.line_spacing = Pt(40)  # spacing between lines

    # Add author below
    p_author = doc.add_paragraph(f"({author})")
    run_author = p_author.runs[0]
    run_author.font.name = 'Arial'
    run_author.font.size = Pt(15)
    p_author.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY
    p_author.paragraph_format.line_spacing = Pt(24)

    # Extra blank paragraph for spacing between puzzles
    doc.add_paragraph("\n" * 3)

# After the last puzzle, add the "Answers on next page" note
p_note = doc.add_paragraph("(Answers on next page)")
p_note.alignment = WD_ALIGN_PARAGRAPH.CENTER
p_note.runs[0].font.size = Pt(18)

# Page break before answers
doc.add_page_break()

# Answers section

doc.add_heading("Answers", level=1)
doc.add_paragraph("\n")  # optional spacing

for i, (quote, author) in enumerate(zip(df["Quote"], df["Author"]), start=1):
    p_answer = doc.add_paragraph(f"Puzzle {i}: {quote} ({author})")
    p_answer.runs[0].font.size = Pt(14)
    p_answer.paragraph_format.line_spacing_rule = WD_LINE_SPACING.SINGLE

# Save the document
doc.save("funny_easy_cryptograms.docx")
