In [1]:
# Import required libraries
from docx import Document
import pandas as pd

In [2]:
# Function to extract tables from a Word document
def extract_tables_from_docx(docx_file):
    """
    Reads all tables from a Word document and returns them as a list of DataFrames.
    """
    document = Document(docx_file)
    tables = []
    
    for table in document.tables:
        # Extract each table's rows and columns into a DataFrame
        data = []
        for row in table.rows:
            row_data = [cell.text.strip() for cell in row.cells]
            data.append(row_data)
        
        # Convert to pandas DataFrame
        df = pd.DataFrame(data)
        tables.append(df)
    
    return tables

# Load the Word document (replace with your file path)
file_path = "input docx.docx"  # Replace with the correct file path
tables = extract_tables_from_docx(file_path)

# Display all tables
for i, table in enumerate(tables):
    print(f"Table {i+1}:")
    display(table.head())


Table 1:


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,Dim,Description,Tol (-),Tol (+),XS,XS,XS,S,S,S
1,Dim,Description,Tol (-),Tol (+),Increment,Sample,Deviation,Increment,Sample,Deviation
2,1X1,Waist Width,-1.00,1.00,46.00,,,50.00,,
3,1F3,Hip Width,-1.00,1.00,36.00,,,38.00,,
4,1F6,Thigh Width,-0.50,0.50,2.50,,,2.50,,


Table 2:


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,3B2,Hem Opening Width,-0.5,0.5,9.1,,,9.5,,
1,3B6,Full Length,-1.0,1.0,2.0,,,2.0,,
2,1M4,Crotch Depth,-1.0,1.0,33.0,,,35.0,,
3,1N2,Calf Width,-1.0,1.0,34.0,,,36.0,,
4,3C1,Seat Width,0.0,0.0,2.0,,,2.0,,


Table 3:


Unnamed: 0,0,1,2,3,4,5,6,7,8
0,M,M,M,L,L,L,XL,XL,XL
1,Increment,Sample,Deviation,Increment,Sample,Deviation,Increment,Sample,Deviation
2,54.00,,,58.00,,,64.00,,
3,40.00,,,42.00,,,44.00,,
4,2.50,,,2.50,,,2.50,,


Table 4:


Unnamed: 0,0,1,2,3
0,Placement,Composition,Qty,Per rate
1,Main Fabric,Full organic cotton,3,0.25
2,Button,Plastic/Wood,2,0.75
3,Thread,mixed fabric,2,0.25
4,Main Label,Recycled Polyster,1,0.25


In [3]:
# Combine the tables as per requirements
def combine_tables(tables):
    """
    Combines tables as per requirements:
    - Table 1 & Table 2: Combined column-wise
    - Table 3: Appended to the combined table row-wise
    """
    # Step 1: Combine Table 1 and Table 2 column-wise
    table_1_2 = pd.concat([tables[0], tables[1]], axis=1)

    # Step 2: Combine Table 3 with the above combined table row-wise
    combined_table = pd.concat([table_1_2, tables[2]], axis=0, ignore_index=True)
    
    return combined_table

# Combine tables
final_table = combine_tables(tables)

# Clean up and display the final table
final_table = final_table.dropna(how='all')  # Remove empty rows if any
final_table.columns = final_table.iloc[0]  # Set the first row as the header
final_table = final_table[1:]  # Drop the first row (now header)
final_table.reset_index(drop=True, inplace=True)

print("Combined Table:")
display(final_table)


Combined Table:


Unnamed: 0,Dim,Description,Tol (-),Tol (+),XS,XS.1,XS.2,S,S.1,S.2,3B2,Hem Opening Width,-0.50,0.50,9.10,Unnamed: 16,Unnamed: 17,9.50,Unnamed: 19,Unnamed: 20
0,Dim,Description,Tol (-),Tol (+),Increment,Sample,Deviation,Increment,Sample,Deviation,3B6,Full Length,-1.00,1.00,2.00,,,2.00,,
1,1X1,Waist Width,-1.00,1.00,46.00,,,50.00,,,1M4,Crotch Depth,-1.00,1.00,33.00,,,35.00,,
2,1F3,Hip Width,-1.00,1.00,36.00,,,38.00,,,1N2,Calf Width,-1.00,1.00,34.00,,,36.00,,
3,1F6,Thigh Width,-0.50,0.50,2.50,,,2.50,,,3C1,Seat Width,0.00,0.00,2.00,,,2.00,,
4,1G5,Knee Width,-1.00,1.00,37.00,,,39.00,,,,,,,,,,,,
5,1H2,Inseam Length,-0.50,0.50,21.20,,,22.00,,,,,,,,,,,,
6,2A1,Outseam Length,-0.50,0.50,17.00,,,18.00,,,,,,,,,,,,
7,2K1,Front Rise Length,-0.50,0.50,15.00,,,16.00,,,,,,,,,,,,
8,3B1,Back Rise Length,-0.50,0.50,16.20,,,177.00,,,,,,,,,,,,
9,M,M,M,L,L,L,XL,XL,XL,,M,M,M,L,L,L,XL,XL,XL,
