<a href="https://colab.research.google.com/github/mahmoudBidry/Latex-to-word/blob/main/latex_to_word.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!wget https://github.com/jgm/pandoc/releases/download/3.1.6.1/pandoc-3.1.6.1-1-amd64.deb
!dpkg -i pandoc-3.1.6.1-1-amd64.deb

--2024-11-08 17:51:34--  https://github.com/jgm/pandoc/releases/download/3.1.6.1/pandoc-3.1.6.1-1-amd64.deb
Resolving github.com (github.com)... 140.82.114.4
Connecting to github.com (github.com)|140.82.114.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/571770/b39f86dc-53a7-4b6b-a501-875f818da938?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241108%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241108T175135Z&X-Amz-Expires=300&X-Amz-Signature=74959c74f30dbdc8d1f4ea6a58cd69688686ee70f09d18345db8a105ef9263cf&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dpandoc-3.1.6.1-1-amd64.deb&response-content-type=application%2Foctet-stream [following]
--2024-11-08 17:51:35--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/571770/b39f86dc-53a7-4b6b-a501-875f818da938?X-Amz-Algorithm=AWS4-HMAC-SHA256&

In [None]:
# Install python-docx if not already installed
!pip install python-docx

Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/244.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m235.5/244.3 kB[0m [31m7.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx
Successfully installed python-docx-1.1.2


In [None]:
from google.colab import drive
import os
import re

# Mount Google Drive
drive.mount('/content/drive')

In [None]:
import re

def count_columns_in_latex_table(latex_code):
    """
    Detects the maximum number of columns in a LaTeX table based on rows.

    Parameters:
    latex_code (str): LaTeX code for the table.

    Returns:
    int: Number of columns in the table.
    """
    lines = latex_code.splitlines()
    max_columns = 0

    for line in lines:
        if "&" in line and "\\" in line:
            column_count = line.count("&") + 1
            max_columns = max(max_columns, column_count)

    return max_columns


def modify_table_format(content):
    """
    Modifies LaTeX table format declarations to use simple centered columns.
    Removes content before the first `\hline`.

    Args:
        content (str): The LaTeX document content
    Returns:
        str: Modified LaTeX content with simplified table column formatting
    """

    # Pattern to capture the entire `tabular` environment content
    pattern = r'(\\begin\{tabular\})\{[^}]*\}(.*?\\hline\s*\n)(.*?\\end\{tabular\})'

    def replacement(match):
        # Extract the parts of the match
        tabular_start = match.group(1)  # `\begin{tabular}{`
        table_header = match.group(2)  # Content before first `\hline`
        table_body = match.group(3)    # Content after first `\hline`

        # Check for nested `tabular` environments
        if re.search(r'\\begin\{tabular\}.*\\end\{tabular\}', table_body, re.DOTALL):
            return match.group(0)  # Return the original content if nested `tabular` is found

        # Calculate the number of columns using `count_columns_in_latex_table`
        num_columns = count_columns_in_latex_table(table_body)

        # Generate a new format with simple centered columns (`|c|`)
        new_format = '|' + 'c|' * num_columns

        # Construct the modified table declaration
        modified_table = f'{tabular_start}{{{new_format}}} \n \hline {table_body}'

        return modified_table

    # Apply the replacement to all `tabular` environments in the content
    modified_content = re.sub(pattern, replacement, content, flags=re.DOTALL)

    return modified_content

In [None]:
# Define directory and paths
latex_dir = "your_drive_folder_path/"
csl_file = os.path.join(latex_dir, "ieee.csl")  # Change 'ieee.csl' if using a different citation style
tex_file = os.path.join(latex_dir, "paper.tex") # Change 'paper.tex' to your LaTeX file name
bib_file = os.path.join(latex_dir, "sample.bib") # Change 'sample.bib' to your bibliography file name
temp_tex_file = os.path.join(latex_dir, "paper_modified.tex")
output_docx = os.path.join(latex_dir, "output.docx")

os.chdir(latex_dir)

In [None]:
# Read original LaTeX content
with open(tex_file, 'r', encoding='utf-8') as file:
    content = file.read()

# Modify table formats
modified_content = modify_table_format(content)

# Save modified content to temporary file
with open(temp_tex_file, 'w', encoding='utf-8') as file:
    file.write(modified_content)

In [None]:
!pandoc "{temp_tex_file}" --citeproc --bibliography="{bib_file}" --csl="{csl_file}" -o "{output_docx}"

In [None]:
from docx import Document
from docx.oxml.ns import nsdecls
from docx.oxml import parse_xml
from docx.shared import Pt

In [None]:
# Load the Word document
input_docx =  os.path.join(latex_dir, "output.docx") # Path to your Word file
output_docx =  os.path.join(latex_dir, "output_with_borders.docx")  # Save path for modified Word file

In [None]:
# Open the document
doc = Document(input_docx)

In [None]:
# Define a function to set borders for a table
def set_table_borders(table):
    for row in table.rows:
        for cell in row.cells:
            # Apply border to each cell
            cell._element.get_or_add_tcPr().append(
                parse_xml(r'<w:tcBorders {}><w:top w:val="single" w:sz="4" w:space="0" w:color="000000"/><w:left w:val="single" w:sz="4" w:space="0" w:color="000000"/><w:bottom w:val="single" w:sz="4" w:space="0" w:color="000000"/><w:right w:val="single" w:sz="4" w:space="0" w:color="000000"/></w:tcBorders>'.format(nsdecls('w')))
            )

# Define a function to set font size for all text in a table
def set_table_font_size(table, font_size):
    for row in table.rows:
        for cell in row.cells:
            for paragraph in cell.paragraphs:
                for run in paragraph.runs:
                    run.font.size = Pt(font_size)

In [None]:
# Loop through all tables in the document, apply borders, and set font size
for table in doc.tables:
    set_table_borders(table)
    set_table_font_size(table, 8)  # Set font size to 8

In [None]:
from docx import Document
from docx.shared import Pt
from docx.oxml.ns import qn

def apply_times_new_roman_font():
    """
    Applies 'Times New Roman' font style to the entire text of a Word (.docx) document.

    Args:
        docx_path (str): Path to the input .docx file.
        output_path (str): Path to save the modified .docx file.
    """

    # Apply font style to all paragraphs and runs
    for paragraph in doc.paragraphs:
        for run in paragraph.runs:
            run.font.name = 'Times New Roman'
            run._element.rPr.rFonts.set(qn('w:eastAsia'), 'Times New Roman')  # Ensures compatibility for East Asian text
            run.font.size = Pt(12)  # Optionally set font size to 12 pt, a standard size for Times New Roman

    # Apply font style to tables as well
    for table in doc.tables:
        for row in table.rows:
            for cell in row.cells:
                for paragraph in cell.paragraphs:
                    for run in paragraph.runs:
                        run.font.name = 'Times New Roman'
                        run._element.rPr.rFonts.set(qn('w:eastAsia'), 'Times New Roman')
                        run.font.size = Pt(12)


In [None]:
# Example usage
apply_times_new_roman_font()

In [None]:
# Save the modified document
doc.save(output_docx)

In [None]:
# Download the modified document to verify the changes
from google.colab import files
files.download(output_docx)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>