<a href="https://colab.research.google.com/github/lekejo/lekejo/blob/main/GenBriefs_No_AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Comment if being run for the second time
!pip install python-docx


Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/244.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━[0m [32m194.6/244.3 kB[0m [31m5.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx
Successfully installed python-docx-1.1.2


In [21]:
#DANGER CLEARS BRIEF FOLDER - KEEP THIS COMMENTED
!rm -rf briefs

In [22]:
# Copyright CIB - the one behind GenSpecs - You know who.
# Asks user to upload an excel containing projects in the format of current CIB Project dashboard and then program will generate project briefs for New and Ongoing projects only
import pandas as pd
from google.colab import files
from docx import Document
from datetime import datetime
import logging
import time
import os
import re
import unicodedata
from tqdm.auto import tqdm

# --- Parameterized values ---
OUTPUT_FOLDER = "briefs"
LOG_FOLDER = "logs"
ALLOWED_TYPES = ('ongoing', 'new')
DATE_FORMAT = '%Y-%m-%d %H:%M:%S'
TYPE_COLUMN = 'Type'
PROJECT_NAME_COLUMN = 'Project Name'
MINISTRY_DEPT_COLUMN = 'Ministry/Dept'
DESCRIPTION_COLUMN = 'Description'
CONTRACT_VALUE_COLUMN = 'Contract Value'
START_DATE_COLUMN = 'Start Date'
COMPLETION_DATE_COLUMN = 'Completion Date'
STATUS_COLUMN = 'Status'
PID_COLUMN = 'PID'  # Add PID column name

# --- Function definitions ---

def generate_project_brief(row):
  """Generates a project brief for a single project as a Word document.

  Args:
      row: A pandas Series representing a single project from the Excel file.

  Returns:
      True if the brief was generated successfully, False otherwise.
  """
  try:
      project_name = str(row[PROJECT_NAME_COLUMN])
      project_type = row[TYPE_COLUMN].capitalize()

      # Extract and format PID
      try:
          pid = str(int(row[PID_COLUMN])).zfill(3) # Convert to integer, then string, then pad
      except ValueError:
          pid = "XXX" # Default value if PID is not an integer

      logging.debug(f"Generating brief for project: {project_name} (PID: {pid})")

      if project_type.lower() not in ALLOWED_TYPES:
          logging.debug(f"Skipping project {project_name} due to status: {project_type}")
          return False

      document = Document()
      document.add_heading(project_name, level=1)

      add_brief_section(document, "Ministry/Department:", row[MINISTRY_DEPT_COLUMN])
      add_brief_section(document, "Project Description", row[DESCRIPTION_COLUMN])
      add_brief_section(document, "Project Value:", row[CONTRACT_VALUE_COLUMN])

      try:
          start_date = datetime.strptime(str(row[START_DATE_COLUMN]), DATE_FORMAT).strftime('%d %B %Y')
      except ValueError:
          start_date = "N/A"

      try:
          completion_date = datetime.strptime(str(row[COMPLETION_DATE_COLUMN]), DATE_FORMAT).strftime('%d %B %Y')
      except ValueError:
          completion_date = "N/A"

      add_brief_section(document, "Timeframe:", f"{start_date} to {completion_date}")
      add_brief_section(document, "Status:", row[STATUS_COLUMN])

      # Include formatted PID in filename
      file_name = sanitize_filename(f"{project_type}_{pid}_{project_name}_Brief.docx")
      file_path = os.path.join(OUTPUT_FOLDER, file_name)

      document.save(file_path)
      logging.debug(f"Brief saved for project: {project_name} as {file_name}")
      return True
  except Exception as e:
      logging.error(f"Error generating brief for project {row[PROJECT_NAME_COLUMN]}: {e}", exc_info=True)
      return False

def add_brief_section(document, header, content):
  """Adds a section with header and content."""
  document.add_paragraph(header, style='Heading 2')
  if isinstance(content, (int, float)):
      content = f"{content:,}"
  document.add_paragraph(str(content))

def sanitize_filename(filename):
  """Removes invalid characters and normalizes filename."""
  filename = unicodedata.normalize('NFKD', filename).encode('ASCII', 'ignore').decode('ASCII')
  filename = re.sub(r'[\\/:*?"<>|]', "-", filename)
  return filename.strip().strip('.')

def main():
  if not os.path.exists(OUTPUT_FOLDER):
    os.makedirs(OUTPUT_FOLDER)
  if not os.path.exists(LOG_FOLDER):
    os.makedirs(LOG_FOLDER)

  timestamp = time.strftime("%Y%m%d-%H%M%S")
  log_file_name = os.path.join(LOG_FOLDER, f"brief_gen_{timestamp}.log")

  logging.basicConfig(filename=log_file_name, level=logging.DEBUG,
                      force=True,
                      format='%(asctime)s - %(levelname)s - %(message)s')

  uploaded = files.upload()
  filename = next(iter(uploaded))

  try:
    df = pd.read_excel(filename)
    logging.debug("Excel file loaded successfully.")
  except Exception as e:
    logging.error(f"Error loading Excel file: {e}", exc_info=True)
    raise e

  total_projects = len(df)
  new_projects = len(df[df[TYPE_COLUMN].str.lower() == 'new'])
  ongoing_projects = len(df[df[TYPE_COLUMN].str.lower() == 'ongoing'])

  print(f"Found {total_projects} projects in the spreadsheet.")
  logging.debug(f"Total projects: {total_projects}")
  logging.debug(f"New projects: {new_projects}")
  logging.debug(f"Ongoing projects: {ongoing_projects}")

  briefs_created = 0
  for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Creating Briefs"):
      if generate_project_brief(row):
        briefs_created += 1

  logging.debug("Project brief generation completed.")
  logging.debug(f"Briefs created: {briefs_created}")

  print(f"Out of {total_projects} projects, {new_projects} were New and {ongoing_projects} were Ongoing.")
  print(f"Total briefs created: {briefs_created}")
  print(f"Log file created at: {log_file_name}")

if __name__ == "__main__":
  main()

Saving ProjectStatus.xlsx to ProjectStatus (10).xlsx
Found 291 projects in the spreadsheet.


Creating Briefs:   0%|          | 0/291 [00:00<?, ?it/s]

Out of 291 projects, 80 were New and 42 were Ongoing.
Total briefs created: 122
Log file created at: logs/brief_gen_20240921-073255.log


In [17]:
import logging
import os

# Create the logs directory if it doesn't exist
if not os.path.exists("logs"):
    os.makedirs("logs")

log_file_name = os.path.join("logs", "test_log.log")

# Configure logging
logging.basicConfig(filename=log_file_name, level=logging.DEBUG,
                    force=True,
                    format='%(asctime)s - %(levelname)s - %(message)s')

logging.debug("This is a test log message.")

print(f"Log file should be created at: {log_file_name}")

Log file should be created at: logs/test_log.log
