In [6]:
from docx import Document
import pandas as pd
import json
import os

def extract_text_from_docx(file_path):
    """Extract text from a .docx file."""
    doc = Document(file_path)
    paragraphs = [p.text for p in doc.paragraphs]
    return "\n".join(paragraphs)

def combine_specific_docx_files_to_dict(file_paths):
    """Combine text from specific .docx files into a dictionary."""
    docx_dict = {}
    for file_path in file_paths:
        if os.path.exists(file_path):
            text = extract_text_from_docx(file_path)
            filename = os.path.basename(file_path)
            docx_dict[filename] = text
        else:
            print(f"File {file_path} does not exist.")
    return docx_dict

def save_dict_to_json(data, file_path):
    """Save dictionary to a JSON file."""
    with open(file_path, 'w', encoding='utf-8') as json_file:
        json.dump(data, json_file, ensure_ascii=False, indent=4)

# List of specific .docx file paths
file_paths = [
    '../data/Challenge _0.docx',
    '../data/Challenge_1.docx',
    '../data/Challenge_2.docx',
    '../data/Challenge_3.docx',
    '../data/Challenge_5.docx',
    '../data/Challenge_6.docx',
    '../data/Challenge_7.docx'
]

# Combine specified .docx files into a dictionary
combined_dict = combine_specific_docx_files_to_dict(file_paths)

save_path = '../data/combined_docx_data.json'
save_dict_to_json(combined_dict, save_path)
