In [7]:
import os
import re
import docx
import pandas as pd
from collections import defaultdict

# –°–ª–æ–≤–∞—Ä—å –¥–ª—è –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏—è —Ç–µ–∫—Å—Ç–æ–≤—ã—Ö —á–∏—Å–µ–ª –≤ —á–∏—Å–ª–æ–≤—ã–µ
TEXT_NUMBERS = {
    'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6,
    'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10, 'eleven': 11, 'twelve': 12,
    'thirteen': 13, 'fourteen': 14, 'fifteen': 15, 'sixteen': 16,
    'seventeen': 17, 'eighteen': 18, 'nineteen': 19, 'twenty': 20,
    'thirty': 30, 'forty': 40, 'fifty': 50, 'sixty': 60, 'seventy': 70,
    'eighty': 80, 'ninety': 90, 'hundred': 100
}

def word_to_number(word):
    """–ü—Ä–µ–æ–±—Ä–∞–∑—É–µ—Ç —Ç–µ–∫—Å—Ç–æ–≤–æ–µ —á–∏—Å–ª–æ –≤ —á–∏—Å–ª–æ–≤–æ–µ –∑–Ω–∞—á–µ–Ω–∏–µ."""
    words = re.split(r'[\s\-]+', word.lower())  # –†–∞–∑–±–∏–≤–∞–µ–º –ø–æ –ø—Ä–æ–±–µ–ª–∞–º –∏ –¥–µ—Ñ–∏—Å–∞–º
    number = 0
    temp = 0

    for w in words:
        if w in TEXT_NUMBERS:
            scale = TEXT_NUMBERS[w]
            if scale == 100:  # –û–±—Ä–∞–±–æ—Ç–∫–∞ —Å–æ—Ç–µ–Ω, –Ω–∞–ø—Ä–∏–º–µ—Ä "one hundred"
                temp *= scale
            else:
                temp += scale
        else:
            return None  # –ï—Å–ª–∏ —Å–ª–æ–≤–æ –Ω–µ —Ä–∞—Å–ø–æ–∑–Ω–∞–Ω–æ, –≤–æ–∑–≤—Ä–∞—â–∞–µ–º None

    number += temp
    return number

def extract_chapter_number(text):
    """–ò—â–µ—Ç –∏ –∏–∑–≤–ª–µ–∫–∞–µ—Ç –Ω–æ–º–µ—Ä –≥–ª–∞–≤—ã –∏–∑ —Ç–µ–∫—Å—Ç–∞."""
    # –ü—Ä–æ–≤–µ—Ä—è–µ–º —á–∏—Å–ª–æ–≤–æ–π —Ñ–æ—Ä–º–∞—Ç, –Ω–∞–ø—Ä–∏–º–µ—Ä, "Chapter 1"
    numeric_match = re.match(r'Chapter\s+(\d+)', text, re.IGNORECASE)
    if numeric_match:
        return int(numeric_match.group(1))
    
    # –ü—Ä–æ–≤–µ—Ä—è–µ–º —Ç–µ–∫—Å—Ç–æ–≤—ã–π —Ñ–æ—Ä–º–∞—Ç, –Ω–∞–ø—Ä–∏–º–µ—Ä, "Chapter One" –∏–ª–∏ "Chapter Thirty-One"
    text_match = re.match(r'Chapter\s+([\w\s\-]+)', text, re.IGNORECASE)
    if text_match:
        word = text_match.group(1).strip()
        return word_to_number(word)
    return None

def extract_text_from_docx(file_path):
    """–ò–∑–≤–ª–µ–∫–∞–µ—Ç —Ç–µ–∫—Å—Ç –∏–∑ .docx —Ñ–∞–π–ª–∞."""
    try:
        print(f"–ü—ã—Ç–∞—é—Å—å –æ—Ç–∫—Ä—ã—Ç—å —Ñ–∞–π–ª: {file_path}")
        doc = docx.Document(file_path)
        print("–§–∞–π–ª —É—Å–ø–µ—à–Ω–æ –æ—Ç–∫—Ä—ã—Ç.")
        full_text = []
        for paragraph in doc.paragraphs:
            full_text.append(paragraph.text)
        print("–¢–µ–∫—Å—Ç —É—Å–ø–µ—à–Ω–æ –∏–∑–≤–ª–µ—á—ë–Ω.")
        return '\n'.join(full_text)
    except Exception as e:
        print(f"–û—à–∏–±–∫–∞ –ø—Ä–∏ –æ—Ç–∫—Ä—ã—Ç–∏–∏ –∏–ª–∏ —á—Ç–µ–Ω–∏–∏ —Ñ–∞–π–ª–∞: {e}")
        return None

def split_text_into_chapters(text):
    """–†–∞–∑–¥–µ–ª—è–µ—Ç —Ç–µ–∫—Å—Ç –Ω–∞ –≥–ª–∞–≤—ã."""
    if not text:
        print("–¢–µ–∫—Å—Ç –ø—É—Å—Ç, –Ω–µ–≤–æ–∑–º–æ–∂–Ω–æ —Ä–∞–∑–¥–µ–ª–∏—Ç—å –Ω–∞ –≥–ª–∞–≤—ã.")
        return {}

    print("–†–∞–∑–¥–µ–ª—è—é —Ç–µ–∫—Å—Ç –Ω–∞ –≥–ª–∞–≤—ã...")
    chapters = defaultdict(tuple)
    chapter_matches = list(re.finditer(r'Chapter\s+([\w\s\-]+)', text, re.IGNORECASE))

    if not chapter_matches:
        print("–ù–µ —É–¥–∞–ª–æ—Å—å –Ω–∞–π—Ç–∏ –≥–ª–∞–≤—ã.")
        return {}

    for i, match in enumerate(chapter_matches):
        start = match.start()
        chapter_title = match.group(0)
        chapter_number = extract_chapter_number(chapter_title)  # –ò–∑–≤–ª–µ–∫–∞–µ–º –Ω–æ–º–µ—Ä –≥–ª–∞–≤—ã
        end = chapter_matches[i + 1].start() if i + 1 < len(chapter_matches) else len(text)
        chapter_text = text[start:end].strip()
        if chapter_number is not None:
            chapters[chapter_number] = (chapter_title, chapter_text)
    
    print(f"–¢–µ–∫—Å—Ç —É—Å–ø–µ—à–Ω–æ —Ä–∞–∑–¥–µ–ª—ë–Ω –Ω–∞ {len(chapters)} –≥–ª–∞–≤.")
    return chapters

# –§—É–Ω–∫—Ü–∏—è –¥–ª—è –ø–æ–∏—Å–∫–∞ —Å–ª–æ–≤ –≤ –∫–∞–∂–¥–æ–π –≥–ª–∞–≤–µ
def search_words_in_chapter(chapter_text, words):
    word_pattern = r'\b(' + '|'.join(re.escape(word) for word in words) + r')\b'
    
    matches = []
    for match in re.finditer(word_pattern, chapter_text, re.IGNORECASE):
        start = match.start()
        end = match.end()
        
        # –ë–µ—Ä–µ–º –ø–æ —Ç—Ä–∏ —Å–ª–æ–≤–∞ –¥–æ –∏ –ø–æ—Å–ª–µ
        start_context = chapter_text[:start].split()[-8:]
        end_context = chapter_text[end:].split()[:8]
        
        # –ü–æ–ª—É—á–∞–µ–º –Ω–∞–π–¥–µ–Ω–Ω–æ–µ —Å–ª–æ–≤–æ –∏ –ø–æ–∑–∏—Ü–∏—é –≤ —Ç–µ–∫—Å—Ç–µ
        found_word = chapter_text[start:end]
        result = ' '.join(start_context + [found_word] + end_context)
        
        matches.append((start, result))  # –°–æ—Ö—Ä–∞–Ω—è–µ–º –ø–æ–∑–∏—Ü–∏—é –∏ —Ä–µ–∑—É–ª—å—Ç–∞—Ç
    
    # –°–æ—Ä—Ç–∏—Ä—É–µ–º —Å–æ–≤–ø–∞–¥–µ–Ω–∏—è –ø–æ –ø–æ–∑–∏—Ü–∏–∏ –≤ —Ç–µ–∫—Å—Ç–µ
    matches.sort(key=lambda x: x[0])
    
    return [match[1] for match in matches]
    
def search_in_all_chapters(chapters, words, category):
    if not chapters:
        print("–ù–µ—Ç –≥–ª–∞–≤ –¥–ª—è –ø–æ–∏—Å–∫–∞.")
        return []
    
    print(f"–ò—â—É –∑–∞–¥–∞–Ω–Ω—ã–µ —Å–ª–æ–≤–∞ –¥–ª—è –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ {category}...")
    results = []
    for chapter_number, (chapter_title, chapter_text) in chapters.items():
        matches = search_words_in_chapter(chapter_text, words)
        if matches:
            for match in matches:
                results.append([chapter_number, chapter_title, match, category])
    print(f"–ù–∞–π–¥–µ–Ω–æ —Å–æ–≤–ø–∞–¥–µ–Ω–∏–π –≤ {len(results)} –≥–ª–∞–≤–∞—Ö –¥–ª—è –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ {category}.")
    return results

# –û—Å—Ç–∞–ª—å–Ω–∞—è —á–∞—Å—Ç—å –∫–æ–¥–∞ –æ—Å—Ç–∞—ë—Ç—Å—è –ø—Ä–µ–∂–Ω–µ–π...

def main(docx_file_path, chapters_folder):
    if not os.path.exists(docx_file_path):
        print(f"–§–∞–π–ª –Ω–µ –Ω–∞–π–¥–µ–Ω: {docx_file_path}")
        return

    print(f"–ò–∑–≤–ª–µ–∫–∞—é —Ç–µ–∫—Å—Ç –∏–∑ {docx_file_path}...")
    book_text = extract_text_from_docx(docx_file_path)
    
    if not book_text:
        print("–û—à–∏–±–∫–∞: —Ç–µ–∫—Å—Ç –Ω–µ –±—ã–ª –∏–∑–≤–ª–µ—á—ë–Ω.")
        return

    chapters = split_text_into_chapters(book_text)
    
    if not chapters:
        print("–û—à–∏–±–∫–∞: –≥–ª–∞–≤—ã –Ω–µ –±—ã–ª–∏ –Ω–∞–π–¥–µ–Ω—ã.")
        return

    clothes_words = ['stilettos', 'sunglasses', 'ring', 'necklace', 'bracelet', 'earrings', 'brooch', 'watch', 'anklet', 'choker', 'pendant', 'cufflinks', 'tie clip', 'nose ring', 'belly button ring', 'toe ring', 'hairpin', 'tiara', 'diadem', 'bangle', 'chain', 'medallion', 'pearl necklace', 'locket', 'armband', 'charm bracelet', 'dress', 'robe', 'suit', 'clothes', 'coat', 'jacket', 'shirt', 'pants', 'skirt', 'jeans', 't-shirt', 'sweater', 'blouse', 'shorts', 'hoodie', 'vest', 'scarf', 'hat', 'gloves', 'boots', 'shoes', 'sneakers', 'socks', 'tie', 'belt', 'gown', 'trench coat', 'blazer', 'cardigan', 'overalls', 'tank top', 'leggings']
    hair_words = ['hair', 'beard', 'ponytail', 'bun', 'braids', 'bob', 'pixie cut', 'long waves', 'curly hair', 'straight hair', 'afro', 'buzz cut', 'french twist', 'dreadlocks', 'fishtail braid', 'half-up half-down', 'side part', 'middle part', 'updo', 'loose curls', 'locks', 'layered cut', 'shag cut', 'crew cut', 'mohawk', 'bangs', 'chignon', 'top knot']
    appearances_words = ['fur', 'black wolf', 'white wolf', 'brown wolf', 'caramel hair', 'blonde', 'brunette', 'redhead', 'white hair', 'red hair', 'auburn hair', 'chestnut hair', 'black hair', 'grey hair', 'dark hair', 'blue eyes', 'blue irises', 'blue eyeballs', 'brown eyes', 'brown irises', 'brown eyeballs', 'black eyes', 'black irises', 'black eyeballs', 'red eyes', 'red irises', 'red eyeballs', 'hazel eyes', 'hazel irises', 'hazel eyeballs', 'green eyes', 'green irises', 'green eyeballs', 'eyes were green', 'eyes were black', 'eyes were brown', 'eyes were hazel', 'eyes were blue', 'eyes were grey', '5 feet', "5'", "6'", "7'", '6 feet', '7 feet', 'feet tall', 'slim', 'thin', 'thick', 'tall', 'dark skin', 'white skin', 'pale skin', 'freckles', 'tattoos', 'tattoo', 'brown skin', 'black skin', 'high cheekbones', 'wrinkles', 'wrinkled', 'full lips', 'small breasts', 'curves', 'big breasts']
    other_words = ['eyes', 'face', 'skin', 'body', 'fur', 'chin', 'cheeks', 'big wolf', 'white wolf', 'beautiful', 'ugly', 'handsome', 'cute', 'gorgeous', 'sharp', 'features', 'arms', 'bicep', 'legs', 'ass', 'breasts', 'waist', 'muscular', 'pale', 'features', 'thin', 'fangs', 'tattoos', 'teeth', 'mouth', 'young', 'old', 'blood', 'bleeding', 'gaze', 'smirk', 'smile', 'lips', 'nose', 'hands', 'jaw']
    weather_words = ['morning', 'afternoon', 'evening', 'night', 'sunrise', 'sunset', 'dawn', 'dusk', 'noon', 'midnight', 'cloudy', 'rain', 'storm', 'wind', 'sun', 'sunny', 'fog', 'foggy', 'snow', 'snowy', 'hail', 'thunder', 'lightning', 'breeze', 'chilly', 'hot', 'warm', 'cold', 'frost', 'blizzard', 'temperature', 'humid', 'dry', 'drizzle', 'pouring', 'downpour', 'mist', 'overcast']
    locations_words = ['forest', 'living room', 'dining room', 'school', 'college', 'training grounds', 'field', 'bathroom', 'bedroom', 'cabin', 'house']
   
    clothes_results = search_in_all_chapters(chapters, clothes_words, 'Clothes')
    hair_results = search_in_all_chapters(chapters, hair_words, 'Hair')
    appearances_results = search_in_all_chapters(chapters, appearances_words, 'Appearances')
    weather_results = search_in_all_chapters(chapters, weather_words, 'Weather')
    locations_results = search_in_all_chapters(chapters, locations_words, 'Locations')
    other_results = search_in_all_chapters(chapters, other_words, 'Other')

    all_results = clothes_results + hair_results + appearances_results + weather_results + locations_results + other_results

    all_data_df = pd.DataFrame(all_results, columns=['Chapter Number', 'Chapter Title', 'Match', 'Category'])
    all_data_df.sort_values(by=['Chapter Number'], inplace=True)

    os.makedirs(f"{chapters_folder}", exist_ok=True)

    excel_file_path = f"{chapters_folder}/details.xlsx"
    try:
        all_data_df.to_excel(excel_file_path, sheet_name='Details', index=False)
        print(f"–§–∞–π–ª Excel —Å–æ–∑–¥–∞–Ω: {excel_file_path}")
    except Exception as e:
        print(f"–û—à–∏–±–∫–∞ –ø—Ä–∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏–∏ Excel —Ñ–∞–π–ª–∞: {e}")

# –ü–∞–ø–∫–∞ –¥–ª—è —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏—è
chapters_folder = "million_dollar_bride"
docx_file_path = f"{chapters_folder}/fulltext.docx"
main(docx_file_path, chapters_folder)


–ò–∑–≤–ª–µ–∫–∞—é —Ç–µ–∫—Å—Ç –∏–∑ million_dollar_bride/fulltext.docx...
–ü—ã—Ç–∞—é—Å—å –æ—Ç–∫—Ä—ã—Ç—å —Ñ–∞–π–ª: million_dollar_bride/fulltext.docx
–§–∞–π–ª —É—Å–ø–µ—à–Ω–æ –æ—Ç–∫—Ä—ã—Ç.
–¢–µ–∫—Å—Ç —É—Å–ø–µ—à–Ω–æ –∏–∑–≤–ª–µ—á—ë–Ω.
–†–∞–∑–¥–µ–ª—è—é —Ç–µ–∫—Å—Ç –Ω–∞ –≥–ª–∞–≤—ã...
–¢–µ–∫—Å—Ç —É—Å–ø–µ—à–Ω–æ —Ä–∞–∑–¥–µ–ª—ë–Ω –Ω–∞ 93 –≥–ª–∞–≤.
–ò—â—É –∑–∞–¥–∞–Ω–Ω—ã–µ —Å–ª–æ–≤–∞ –¥–ª—è –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ Clothes...
–ù–∞–π–¥–µ–Ω–æ —Å–æ–≤–ø–∞–¥–µ–Ω–∏–π –≤ 277 –≥–ª–∞–≤–∞—Ö –¥–ª—è –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ Clothes.
–ò—â—É –∑–∞–¥–∞–Ω–Ω—ã–µ —Å–ª–æ–≤–∞ –¥–ª—è –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ Hair...
–ù–∞–π–¥–µ–Ω–æ —Å–æ–≤–ø–∞–¥–µ–Ω–∏–π –≤ 178 –≥–ª–∞–≤–∞—Ö –¥–ª—è –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ Hair.
–ò—â—É –∑–∞–¥–∞–Ω–Ω—ã–µ —Å–ª–æ–≤–∞ –¥–ª—è –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ Appearances...
–ù–∞–π–¥–µ–Ω–æ —Å–æ–≤–ø–∞–¥–µ–Ω–∏–π –≤ 129 –≥–ª–∞–≤–∞—Ö –¥–ª—è –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ Appearances.
–ò—â—É –∑–∞–¥–∞–Ω–Ω—ã–µ —Å–ª–æ–≤–∞ –¥–ª—è –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ Weather...
–ù–∞–π–¥–µ–Ω–æ —Å–æ–≤–ø–∞–¥–µ–Ω–∏–π –≤ 306 –≥–ª–∞–≤–∞—Ö –¥–ª—è –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ Weather.
–ò—â—É –∑–

In [15]:
import os
import re
import pandas as pd

def search_words_in_chapter(chapter_text, words):
    """–ò—â–µ—Ç —Å–ª–æ–≤–∞ –≤ —Ç–µ–∫—Å—Ç–µ –≥–ª–∞–≤—ã –∏ –≤–æ–∑–≤—Ä–∞—â–∞–µ—Ç –∏—Ö —Å –ø–æ–∑–∏—Ü–∏—è–º–∏."""
    word_pattern = r'\b(' + '|'.join(re.escape(word) for word in words) + r')\b'
    matches = []

    for match in re.finditer(word_pattern, chapter_text, re.IGNORECASE):
        start, end = match.start(), match.end()

        # –°–æ–±–∏—Ä–∞–µ–º –∫–æ–Ω—Ç–µ–∫—Å—Ç—ã –≤–æ–∫—Ä—É–≥ –Ω–∞–π–¥–µ–Ω–Ω–æ–≥–æ —Å–ª–æ–≤–∞
        start_context = chapter_text[:start].split()[-8:]
        end_context = chapter_text[end:].split()[:8]
        found_word = chapter_text[start:end]
        result = ' '.join(start_context + [found_word] + end_context)

        # –°–æ—Ö—Ä–∞–Ω—è–µ–º –ø–æ–∑–∏—Ü–∏—é –∏ —Ä–µ–∑—É–ª—å—Ç–∞—Ç
        matches.append((start, result))

    return matches

def search_in_all_chapters(chapters, words, category):
    """–ò—â–µ—Ç —Å–ª–æ–≤–∞ –≤–æ –≤—Å–µ—Ö –≥–ª–∞–≤–∞—Ö –∏ –ø–æ–¥—Å—á–∏—Ç—ã–≤–∞–µ—Ç –æ–±—â–µ–µ –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ —Å–æ–≤–ø–∞–¥–µ–Ω–∏–π."""
    total_matches = 0
    results = []

    for chapter_number, chapter_title, chapter_text in chapters:
        matches = search_words_in_chapter(chapter_text, words)
        total_matches += len(matches)  # –£–≤–µ–ª–∏—á–∏–≤–∞–µ–º —Å—á–µ—Ç—á–∏–∫ —Å–æ–≤–ø–∞–¥–µ–Ω–∏–π

        for position, match in matches:
            results.append([chapter_number, chapter_title, position, match, category])

    print(f"‚úÖ –ö–∞—Ç–µ–≥–æ—Ä–∏—è '{category}': –Ω–∞–π–¥–µ–Ω–æ {total_matches} —Å–æ–≤–ø–∞–¥–µ–Ω–∏–π.")
    return results

def main(docx_file_path, chapters_folder):
    """–û—Å–Ω–æ–≤–Ω–∞—è –ª–æ–≥–∏–∫–∞ –æ–±—Ä–∞–±–æ—Ç–∫–∏ –∏ –ø–æ–∏—Å–∫–∞."""
    print(f"üìÇ –ü—Ä–æ–≤–µ—Ä–∫–∞ –Ω–∞–ª–∏—á–∏—è —Ñ–∞–π–ª–∞: {docx_file_path}")
    if not os.path.exists(docx_file_path):
        print(f"‚ùå –§–∞–π–ª –Ω–µ –Ω–∞–π–¥–µ–Ω: {docx_file_path}")
        return

    print("üìñ –ò–∑–≤–ª–µ—á–µ–Ω–∏–µ —Ç–µ–∫—Å—Ç–∞ –∏–∑ —Ñ–∞–π–ª–∞...")
    book_text = extract_text_from_docx(docx_file_path)
    if not book_text:
        print("‚ùå –ù–µ —É–¥–∞–ª–æ—Å—å –∏–∑–≤–ª–µ—á—å —Ç–µ–∫—Å—Ç –∏–∑ —Ñ–∞–π–ª–∞.")
        return

    print("‚úÇÔ∏è –†–∞–∑–¥–µ–ª–µ–Ω–∏–µ —Ç–µ–∫—Å—Ç–∞ –Ω–∞ –≥–ª–∞–≤—ã...")
    chapters = split_text_into_chapters(book_text)
    if not chapters:
        print("‚ùå –ù–µ —É–¥–∞–ª–æ—Å—å —Ä–∞–∑–¥–µ–ª–∏—Ç—å —Ç–µ–∫—Å—Ç –Ω–∞ –≥–ª–∞–≤—ã.")
        return

    # –°–ø–∏—Å–∫–∏ –∫–ª—é—á–µ–≤—ã—Ö —Å–ª–æ–≤ –¥–ª—è –ø–æ–∏—Å–∫–∞
    clothes_words = ['stilettos', 'sunglasses', 'ring', 'necklace', 'bracelet', 'earrings', 'brooch', 'watch', 'anklet', 'choker', 'pendant', 'cufflinks', 'tie clip', 'nose ring', 'belly button ring', 'toe ring', 'hairpin', 'tiara', 'diadem', 'bangle', 'chain', 'medallion', 'pearl necklace', 'locket', 'armband', 'charm bracelet', 'dress', 'robe', 'suit', 'clothes', 'coat', 'jacket', 'shirt', 'pants', 'skirt', 'jeans', 't-shirt', 'sweater', 'blouse', 'shorts', 'hoodie', 'vest', 'scarf', 'hat', 'gloves', 'boots', 'shoes', 'sneakers', 'socks', 'tie', 'belt', 'gown', 'trench coat', 'blazer', 'cardigan', 'overalls', 'tank top', 'leggings']
    hair_words = ['hair', 'beard', 'ponytail', 'bun', 'braids', 'bob', 'pixie cut', 'long waves', 'curly hair', 'straight hair', 'afro', 'buzz cut', 'french twist', 'dreadlocks', 'fishtail braid', 'half-up half-down', 'side part', 'middle part', 'updo', 'loose curls', 'locks', 'layered cut', 'shag cut', 'crew cut', 'mohawk', 'bangs', 'chignon', 'top knot']
    appearances_words = ['fur', 'black wolf', 'white wolf', 'brown wolf', 'caramel hair', 'blonde', 'brunette', 'redhead', 'white hair', 'red hair', 'auburn hair', 'chestnut hair', 'black hair', 'grey hair', 'dark hair', 'blue eyes', 'blue irises', 'blue eyeballs', 'brown eyes', 'brown irises', 'brown eyeballs', 'black eyes', 'black irises', 'black eyeballs', 'red eyes', 'red irises', 'red eyeballs', 'hazel eyes', 'hazel irises', 'hazel eyeballs', 'green eyes', 'green irises', 'green eyeballs', 'eyes were green', 'eyes were black', 'eyes were brown', 'eyes were hazel', 'eyes were blue', 'eyes were grey', '5 feet', "5'", "6'", "7'", '6 feet', '7 feet', 'feet tall', 'slim', 'thin', 'thick', 'tall', 'dark skin', 'white skin', 'pale skin', 'freckles', 'tattoos', 'tattoo', 'brown skin', 'black skin', 'high cheekbones', 'wrinkles', 'wrinkled', 'full lips', 'small breasts', 'curves', 'big breasts']
    other_words = ['eyes', 'face', 'skin', 'body', 'fur', 'chin', 'cheeks', 'big wolf', 'white wolf', 'beautiful', 'ugly', 'handsome', 'cute', 'gorgeous', 'sharp', 'features', 'arms', 'bicep', 'legs', 'ass', 'breasts', 'waist', 'muscular', 'pale', 'features', 'thin', 'fangs', 'tattoos', 'teeth', 'mouth', 'young', 'old', 'blood', 'bleeding', 'gaze', 'smirk', 'smile', 'lips', 'nose', 'hands', 'jaw']
    weather_words = ['morning', 'afternoon', 'evening', 'night', 'sunrise', 'sunset', 'dawn', 'dusk', 'noon', 'midnight', 'cloudy', 'rain', 'storm', 'wind', 'sun', 'sunny', 'fog', 'foggy', 'snow', 'snowy', 'hail', 'thunder', 'lightning', 'breeze', 'chilly', 'hot', 'warm', 'cold', 'frost', 'blizzard', 'temperature', 'humid', 'dry', 'drizzle', 'pouring', 'downpour', 'mist', 'overcast']
    locations_words = ['forest', 'living room', 'dining room', 'school', 'college', 'training grounds', 'field', 'bathroom', 'bedroom', 'cabin', 'house']
   

    all_results = []
    for words, category in [
        (clothes_words, 'Clothes'), (hair_words, 'Hair'),
        (appearances_words, 'Appearances'), (weather_words, 'Weather'),
        (locations_words, 'Locations'), (other_words, 'Other')
    ]:
        all_results.extend(search_in_all_chapters(chapters, words, category))

    print("üìä –°–æ–∑–¥–∞–Ω–∏–µ DataFrame —Å —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∞–º–∏...")
    all_data_df = pd.DataFrame(all_results, columns=['Chapter Number', 'Chapter Title', 'Position', 'Match', 'Category'])
    
    # –°–æ—Ä—Ç–∏—Ä—É–µ–º –ø–æ –Ω–æ–º–µ—Ä—É –≥–ª–∞–≤—ã –∏ –ø–æ–∑–∏—Ü–∏–∏
    all_data_df.sort_values(by=['Chapter Number', 'Position'], inplace=True)

    print("üíæ –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤ –≤ Excel...")
    os.makedirs(chapters_folder, exist_ok=True)
    excel_file_path = f"{chapters_folder}/details.xlsx"
    
    # –£–±–∏—Ä–∞–µ–º –∫–æ–ª–æ–Ω–∫—É 'Position' –ø–µ—Ä–µ–¥ —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ–º
    try:
        all_data_df.drop(columns=['Position']).to_excel(excel_file_path, sheet_name='Details', index=False)
        print(f"‚úÖ –§–∞–π–ª Excel —Å–æ–∑–¥–∞–Ω: {excel_file_path}")
    except Exception as e:
        print(f"‚ùå –û—à–∏–±–∫–∞ –ø—Ä–∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏–∏ Excel —Ñ–∞–π–ª–∞: {e}")

# –ó–∞–ø—É—Å–∫ –æ—Å–Ω–æ–≤–Ω–æ–≥–æ –ø—Ä–æ—Ü–µ—Å—Å–∞
chapters_folder = "million_dollar_bride"
docx_file_path = f"{chapters_folder}/fulltext.docx"
main(docx_file_path, chapters_folder)


üìÇ –ü—Ä–æ–≤–µ—Ä–∫–∞ –Ω–∞–ª–∏—á–∏—è —Ñ–∞–π–ª–∞: million_dollar_bride/fulltext.docx
üìñ –ò–∑–≤–ª–µ—á–µ–Ω–∏–µ —Ç–µ–∫—Å—Ç–∞ –∏–∑ —Ñ–∞–π–ª–∞...
‚úÇÔ∏è –†–∞–∑–¥–µ–ª–µ–Ω–∏–µ —Ç–µ–∫—Å—Ç–∞ –Ω–∞ –≥–ª–∞–≤—ã...
‚úÖ –ö–∞—Ç–µ–≥–æ—Ä–∏—è 'Clothes': –Ω–∞–π–¥–µ–Ω–æ 277 —Å–æ–≤–ø–∞–¥–µ–Ω–∏–π.
‚úÖ –ö–∞—Ç–µ–≥–æ—Ä–∏—è 'Hair': –Ω–∞–π–¥–µ–Ω–æ 178 —Å–æ–≤–ø–∞–¥–µ–Ω–∏–π.
‚úÖ –ö–∞—Ç–µ–≥–æ—Ä–∏—è 'Appearances': –Ω–∞–π–¥–µ–Ω–æ 129 —Å–æ–≤–ø–∞–¥–µ–Ω–∏–π.
‚úÖ –ö–∞—Ç–µ–≥–æ—Ä–∏—è 'Weather': –Ω–∞–π–¥–µ–Ω–æ 306 —Å–æ–≤–ø–∞–¥–µ–Ω–∏–π.
‚úÖ –ö–∞—Ç–µ–≥–æ—Ä–∏—è 'Locations': –Ω–∞–π–¥–µ–Ω–æ 201 —Å–æ–≤–ø–∞–¥–µ–Ω–∏–π.
‚úÖ –ö–∞—Ç–µ–≥–æ—Ä–∏—è 'Other': –Ω–∞–π–¥–µ–Ω–æ 2104 —Å–æ–≤–ø–∞–¥–µ–Ω–∏–π.
üìä –°–æ–∑–¥–∞–Ω–∏–µ DataFrame —Å —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∞–º–∏...
üíæ –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤ –≤ Excel...
‚úÖ –§–∞–π–ª Excel —Å–æ–∑–¥–∞–Ω: million_dollar_bride/details.xlsx
