In [None]:
# Copyright 2025 The Contributors
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pkiage/sandbox-gworkspace-msoffice-automation/blob/main/GoogleColab/get_comments-ms_excel.ipynb)

In [None]:
# 0. Install and import libraries in Google Colab
from google.colab import files
import openpyxl
import pandas as pd
!pip install openpyxl --quiet

In [None]:
# 1. Upload Excel file
uploaded = files.upload()
excel_path = next(iter(uploaded.keys()))

In [None]:
# 2. Fix mojibake (garbled Unicode)
def fix_mojibake(text):
    try:
        return text.encode('latin1').decode('utf-8')
    except Exception:
        return text

In [None]:
# 3. Clean comment: remove everything before "Comment:"
def clean_comment(text):
    if "Comment:" in text:
        text = text.split("Comment:", 1)[1]
    return text.strip()

In [None]:
# 4. Load workbook and extract comments
wb = openpyxl.load_workbook(excel_path)
records = []

for sheet in wb.sheetnames:
    ws = wb[sheet]
    for row in ws.iter_rows():
        for cell in row:
            if cell.comment:
                raw = cell.comment.text
                cleaned = clean_comment(raw)
                fixed = fix_mojibake(cleaned)
                records.append({
                    'sheet': sheet,
                    'cell': cell.coordinate,
                    'author': cell.comment.author,
                    'comment': fixed
                })

In [None]:
# 5. Save to CSV
df = pd.DataFrame(records)
df.to_csv('excel_comments_clean.csv', index=False)
print(f"Wrote {len(df)} cleaned comments to excel_comments_clean.csv")

In [None]:
# 6. Download
files.download('excel_comments_clean.csv')