From d5911583a6c24088baac01090c798a856f78f623 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Tue, 26 Nov 2024 16:04:49 +0100 Subject: [PATCH 1/4] new stuff --- .gitignore | 1 + scripts/__init__.py | 0 scripts/parse_md_table.py | 61 +++++++++ scripts/shift_calculations.py | 226 +++++++++++++++++++++------------- 4 files changed, 203 insertions(+), 85 deletions(-) create mode 100644 scripts/__init__.py create mode 100644 scripts/parse_md_table.py diff --git a/.gitignore b/.gitignore index 713d8a66..822ea33e 100644 --- a/.gitignore +++ b/.gitignore @@ -71,6 +71,7 @@ instance/ # Sphinx documentation docs/_build/ +doc/_build/ doc_build/ # PyBuilder diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/scripts/parse_md_table.py b/scripts/parse_md_table.py new file mode 100644 index 00000000..dbb28555 --- /dev/null +++ b/scripts/parse_md_table.py @@ -0,0 +1,61 @@ +from collections.abc import Sequence +from pathlib import Path + +import pandas as pd + +def parse_file(file_path: Path) -> pd.DataFrame: + """ Parses a markdown file, containing a shift-table, into a pandas dataframe. + + Args: + file_path (Path): Path to the markdown file. + + Returns: + pd.DataFrame: DataFrame containing the shift table. + """ + header, data = get_table_parts(file_path.read_text().split("\n")) + df = pd.DataFrame( + columns=parse_line(header[0]), + data=[parse_line(line) for line in data], + ) + return df + + +def parse_line(line: str): + """Convert a single line of a table into a list of parts. + + Args: + line (str): Line of the table. + + Returns: + List[str]: List of the table row entries. + """ + return [part.strip() for part in line.split("|")][1:-1] + + +def get_table_parts(content: Sequence[str]): + """ Splits a markdown table into header and data. """ + header = [] + data = [] + header_finished = False + + for line in content: + line = line.strip() + + if not line.startswith("|"): + if not data: + continue + else: + break + + if ":---" in line: + header_finished = True + continue + + if not header_finished: + header.append(line) + else: + data.append(line) + + return header, data + + diff --git a/scripts/shift_calculations.py b/scripts/shift_calculations.py index 310d397a..6438bbfb 100644 --- a/scripts/shift_calculations.py +++ b/scripts/shift_calculations.py @@ -4,84 +4,25 @@ import re from datetime import datetime, timedelta from pathlib import Path -from typing import Dict, Sequence, Union import matplotlib as mpl -import pandas as pd from matplotlib import pyplot as plt +from matplotlib.figure import Figure +from scripts.parse_md_table import parse_file -# Parse Markdown File ---------------------------------------------------------- + +# Check dates ------------------------------------------------------------------ COLUMN_START = "Start Date" COLUMN_END = "End Date" COLUMN_SHIFTS = "Shifts" -DATE_FORMAT = "%Y-%m-%d %H:%M" - -def parse_file(file_path: Path) -> pd.DataFrame: - """ Parses a markdown file, containing a shift-table, into a pandas dataframe. - - Args: - file_path (Path): Path to the markdown file. - - Returns: - pd.DataFrame: DataFrame containing the shift table. - """ - header, data = get_table_parts(file_path.read_text().split("\n")) - df = pd.DataFrame( - columns=parse_line(header[0]), - data=[parse_line(line) for line in data], - ) - return df - - -def parse_line(line: str): - """Convert a single line of a table into a list of parts. - - Args: - line (str): Line of the table. - - Returns: - List[str]: List of the table row entries. - """ - return [part.strip() for part in line.split("|")][1:-1] - - -def get_table_parts(content: Sequence[str]): - """ Splits a markdown table into header and data. """ - header = [] - data = [] - header_finished = False - - for line in content: - line = line.strip() +COLUMN_TYPE = "Type" - if not line.startswith("|"): - if not data: - continue - else: - break - - if ":---" in line: - header_finished = True - continue - - if not header_finished: - header.append(line) - else: - data.append(line) - - return header, data - - -def str_to_dt(date_str: str) -> datetime: - return datetime.strptime(date_str, DATE_FORMAT) - - -# Check dates ------------------------------------------------------------------ WORK = "W" WORK_NIGHT = "WN" HOLIDAY = "H" HOLIDAY_NIGHT = "HN" ALL_SHIFTS = (WORK, WORK_NIGHT, HOLIDAY, HOLIDAY_NIGHT) + SHIFT_NAMING = { WORK: "Mo - Fr (Day)", WORK_NIGHT: "Mo - Fr (Night)", @@ -93,7 +34,20 @@ def str_to_dt(date_str: str) -> datetime: WORK_END_TIME = {"hour": 17, "minute": 30, "second": 0} SHIFT_LENGTH = 8 # in hours +DATE_FORMAT = "%Y-%m-%d %H:%M" + CERN_HOLIDAYS = [ + # https://home.cern/official-holidays/ + datetime(2024, 1, 1), + datetime(2024, 3, 29), + datetime(2024, 4, 1), + datetime(2024, 5, 1), + datetime(2024, 5, 9), + datetime(2024, 5, 20), + datetime(2024, 9, 5), + datetime(2024, 12, 24), + datetime(2024, 12, 25), + datetime(2024, 12, 31), # https://home.cern/official-holidays/2023 datetime(2023, 1, 2), datetime(2023, 4, 7), @@ -121,6 +75,10 @@ def str_to_dt(date_str: str) -> datetime: ] +def str_to_dt(date_str: str) -> datetime: + return datetime.strptime(date_str, DATE_FORMAT) + + def same_day(d1: datetime, d2: datetime) -> bool: """ True if the dates are on the same day. """ return (d1.year == d2.year) and (d1.month == d2.month) and (d1.day == d2.day) @@ -131,7 +89,7 @@ def is_holiday(date: datetime): return any(same_day(date, h) for h in CERN_HOLIDAYS) -def calculate_shift_parts(start_time: datetime, end_time: datetime) -> Dict[str, timedelta]: +def calculate_shift_parts(start_time: datetime, end_time: datetime) -> dict[str, timedelta]: """Split the given shift into work hours, holidays/weekends day or night shifts. Args: @@ -188,7 +146,6 @@ def time_delta_to_shifts(time_delta: timedelta): return time_delta_to_hours(time_delta) / SHIFT_LENGTH - # Tests ------------------------------------------------------------------------ EPS = 1e-6 @@ -235,11 +192,12 @@ def test_working_hours_single_day(): # Main -------------------------------------------------------------------------- -def calculate_shifts(file_path: Union[str, Path]): +def calculate_shifts(file_path: str | Path, shift_type: str = None) -> dict[str, timedelta]: """Calculate the shifts from Start/End Date columns of the first markdown table in a given file. Args: - file_path (Union[str, Path]): Path to the markdown file. + file_path (str | Path): Path to the markdown file. + shift_type (str, optional): Regex to filter shift type. Returns: Dict[str, timedelta]: Dictionary of the total time deltas separated by @@ -248,6 +206,9 @@ def calculate_shifts(file_path: Union[str, Path]): file_path = Path(file_path) df = parse_file(file_path) + if shift_type is not None: + df = df.loc[df[COLUMN_TYPE].str.match(shift_type), :] + parts = {shift: timedelta() for shift in ALL_SHIFTS} if not all(c in df.columns for c in [COLUMN_START, COLUMN_END]): @@ -271,12 +232,12 @@ def calculate_shifts(file_path: Union[str, Path]): return parts - -def manual_shifts(file_path: Union[str, Path]): +def manual_shifts(file_path: str | Path, shift_type: str = None) -> dict[str, float]: """Calculate the shifts from Shifts column of the first markdown table in a given file. Args: - file_path (Union[str, Path]): Path to the markdown file. + file_path (str | Path): Path to the markdown file. + shift_type (str): Regex to filter shift-type. Returns: Dict[str, timedelta]: Dictionary of the total time deltas separated by @@ -285,8 +246,10 @@ def manual_shifts(file_path: Union[str, Path]): file_path = Path(file_path) df = parse_file(file_path) - parts = {shift: 0.0 for shift in ALL_SHIFTS} + if shift_type is not None: + df = df.loc[df[COLUMN_TYPE].str.match(shift_type), :] + parts = {shift: 0.0 for shift in ALL_SHIFTS} if COLUMN_SHIFTS not in df.columns: raise ValueError(f"No shift column found in {file_path.name}") @@ -306,23 +269,34 @@ def manual_shifts(file_path: Union[str, Path]): return parts -def plot_results(parts, title: str = "", output_path: Union[str, Path] = None): +def plot_results(parts, title: str = "", output_path: str | Path = None) -> Figure: """Plot the results of a calculation. Args: parts (Dict[str, timedelta]): Dictionary of the total time deltas separated by the type of hours (working hours, outside working hours, holidays or weekends). - output_path (Union[str, Path]): Path to the output file. + output_path (str | Path): Path to the output file. """ fig, ax = plt.subplots() - data = [time_delta_to_shifts(value) if isinstance(value, timedelta) else value for value in parts.values() ] + data = [time_delta_to_shifts(value) if isinstance(value, timedelta) else value for value in parts.values()] labels = [f"{SHIFT_NAMING[k]}: {v:.1f}" for k, v in zip(parts.keys(), data)] - + colors = [f"C{i}" for i, k in enumerate(parts.keys())] # fix colors + explode = [0.1 * (s == WORK) for s in parts.keys()] # explode working hours + + # filter shift-entries that were not present + def filter_by_data(array): + return [a for a, d in zip(array, data) if d] + labels = filter_by_data(labels) + colors = filter_by_data(colors) + explode = filter_by_data(explode) + data = filter_by_data(data) # filter data last! + + # plot ax.pie( - data, labels=labels, + data, labels=labels, colors=colors, autopct='%1.1f%%', - explode=[0.1 * (s == WORK) for s in parts.keys()], + explode=explode, shadow=True, # startangle=90, # rotate if needed # counterclock=False, # go the other way around @@ -338,6 +312,62 @@ def plot_results(parts, title: str = "", output_path: Union[str, Path] = None): return fig +def plot_all_machines_in_year( + year: int, additional: dict[str, float], calculate: bool = False, output_path: str | Path = None + ) -> Figure: + """Do a pychart for all machines of a specific year. + + Args: + year (int): The year to plot. + additional (dict[str, float]): Additional data to plot. + calculate (bool, optional): If True, calculate the shifts from the logbook file. Defaults to False. + output_path (str | Path, optional): Path to the output file. + + Returns: + Figure: Figure of the plot. + """ + color_map = {name: f"C{ii}" for ii, name in enumerate(["lhc", "sps", "ps", "psb", "leir", "ad"])} + + data_map: dict[str, float] = {} + for file_path in logbook_dir.glob(f"{year:4d}_*.md"): + machine = file_path.stem.split("_")[1] + if calculate: + shift = calculate_shifts(file_path) + else: + shift = manual_shifts(file_path) + times = [time_delta_to_shifts(value) if isinstance(value, timedelta) else value for value in shift.values()] + data_map[machine] = sum(times) + + for name, value in additional.items(): + if name in data_map: + data_map[name] += value + else: + data_map[name] = value + + # data to list + data = [d for d in data_map.values() if d] + colors = [color_map[name] for name, d in data_map.items() if d] + labels = [f"{name.upper()}: {d:.1f}" for name, d in data_map.items() if d] + + # plot + fig, ax = plt.subplots() + ax.pie( + data, labels=labels, colors=colors, + autopct='%1.1f%%', + shadow=True, + # startangle=90, # rotate if needed + # counterclock=False, # go the other way around + ) + + title = f"Total OMC Shifts in {year:d}: {sum(data):.1f}" + ax.set_title(title) + ax.axis('equal') + fig.tight_layout() + if output_path: + fig.savefig(output_path) + + + if __name__ == "__main__": # Run Tests ------------------------------------------------ test_timedelta_conversion() @@ -353,19 +383,45 @@ def plot_results(parts, title: str = "", output_path: Union[str, Path] = None): repo_dir = Path(__file__).parent.parent logbook_dir = repo_dir / "docs" / "resources" / "logbook" + # 2023 --------------------------------------------------------------------- + # shift_c = calculate_shifts(logbook_dir / "2023_lhc.md") # plot_results(shift_c, title="OMC Shifts LHC 2023 (from Start/End)") # shift_c = calculate_shifts(logbook_dir / "2023_ps.md") # plot_results(shift_c, title="OMC Shifts PS 2023 (from Start/End)") - shift_m = manual_shifts(logbook_dir / "2023_lhc.md") - plot_results(shift_m, title="OMC Shifts LHC 2023", output_path="lhc_2023_shifts.pdf") + # shift_m = manual_shifts(logbook_dir / "2023_lhc.md") + # plot_results(shift_m, title="OMC Shifts LHC 2023", output_path="lhc_2023_shifts.pdf") - shift_m = manual_shifts(logbook_dir / "2023_ps.md") - plot_results(shift_m, title="OMC Shifts PS 2023", output_path="ps_2023_shifts.pdf") + # shift_m = manual_shifts(logbook_dir / "2023_ps.md") + # plot_results(shift_m, title="OMC Shifts PS 2023", output_path="ps_2023_shifts.pdf") - shift_m = manual_shifts(logbook_dir / "2023_psb.md") - plot_results(shift_m, title="OMC Shifts PSBooster 2023", output_path="psb_2023_shifts.pdf") + # shift_m = manual_shifts(logbook_dir / "2023_psb.md") + # plot_results(shift_m, title="OMC Shifts PSBooster 2023", output_path="psb_2023_shifts.pdf") + + # 2024 --------------------------------------------------------------------- + + # shift_m = manual_shifts(logbook_dir / "2024_lhc.md") + # plot_results(shift_m, title="OMC Shifts LHC 2024", output_path="lhc_2024_shifts.pdf") + + # shift_m = manual_shifts(logbook_dir / "2024_lhc.md", shift_type="Commissioning") + # plot_results(shift_m, title="OMC Shifts LHC 2024 (Commissioning)", output_path="lhc_2024_shifts_commish.pdf") + + # shift_m = manual_shifts(logbook_dir / "2024_lhc.md", shift_type="MD") + # plot_results(shift_m, title="OMC Shifts LHC 2024 (MDs)", output_path="lhc_2024_shifts_md.pdf") + + # shift_m = manual_shifts(logbook_dir / "2024_ps.md") + # plot_results(shift_m, title="OMC Shifts PS 2024", output_path="ps_2024_shifts.pdf") + + # shift_m = manual_shifts(logbook_dir / "2024_psb.md") + # plot_results(shift_m, title="OMC Shifts PSBooster 2024", output_path="psb_2024_shifts.pdf") + + # shift_m = manual_shifts(logbook_dir / "2024_sps.md") + # plot_results(shift_m, title="OMC Shifts SPS 2024", output_path="sps_2024_shifts.pdf") + + plot_all_machines_in_year(2024, {"leir": 19}, output_path="machines_2024.pdf") + plt.show() + From 96f556ed2183db6f24e2aa191ddd66ba633120da Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Tue, 26 Nov 2024 17:37:06 +0100 Subject: [PATCH 2/4] added 2022 plotting --- scripts/shift_calculations.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/scripts/shift_calculations.py b/scripts/shift_calculations.py index 6438bbfb..40cdabb1 100644 --- a/scripts/shift_calculations.py +++ b/scripts/shift_calculations.py @@ -8,7 +8,7 @@ import matplotlib as mpl from matplotlib import pyplot as plt from matplotlib.figure import Figure -from scripts.parse_md_table import parse_file +from parse_md_table import parse_file # Check dates ------------------------------------------------------------------ @@ -383,6 +383,11 @@ def plot_all_machines_in_year( repo_dir = Path(__file__).parent.parent logbook_dir = repo_dir / "docs" / "resources" / "logbook" + # 2022 --------------------------------------------------------------------- + + shift_m = manual_shifts(logbook_dir / "2022_lhc.md") + plot_results(shift_m, title="OMC Shifts LHC 2022", output_path="lhc_2022_shifts.pdf") + # 2023 --------------------------------------------------------------------- # shift_c = calculate_shifts(logbook_dir / "2023_lhc.md") @@ -391,8 +396,8 @@ def plot_all_machines_in_year( # shift_c = calculate_shifts(logbook_dir / "2023_ps.md") # plot_results(shift_c, title="OMC Shifts PS 2023 (from Start/End)") - # shift_m = manual_shifts(logbook_dir / "2023_lhc.md") - # plot_results(shift_m, title="OMC Shifts LHC 2023", output_path="lhc_2023_shifts.pdf") + shift_m = manual_shifts(logbook_dir / "2023_lhc.md") + plot_results(shift_m, title="OMC Shifts LHC 2023", output_path="lhc_2023_shifts.pdf") # shift_m = manual_shifts(logbook_dir / "2023_ps.md") # plot_results(shift_m, title="OMC Shifts PS 2023", output_path="ps_2023_shifts.pdf") @@ -402,8 +407,8 @@ def plot_all_machines_in_year( # 2024 --------------------------------------------------------------------- - # shift_m = manual_shifts(logbook_dir / "2024_lhc.md") - # plot_results(shift_m, title="OMC Shifts LHC 2024", output_path="lhc_2024_shifts.pdf") + shift_m = manual_shifts(logbook_dir / "2024_lhc.md") + plot_results(shift_m, title="OMC Shifts LHC 2024", output_path="lhc_2024_shifts.pdf") # shift_m = manual_shifts(logbook_dir / "2024_lhc.md", shift_type="Commissioning") # plot_results(shift_m, title="OMC Shifts LHC 2024 (Commissioning)", output_path="lhc_2024_shifts_commish.pdf") @@ -420,7 +425,7 @@ def plot_all_machines_in_year( # shift_m = manual_shifts(logbook_dir / "2024_sps.md") # plot_results(shift_m, title="OMC Shifts SPS 2024", output_path="sps_2024_shifts.pdf") - plot_all_machines_in_year(2024, {"leir": 19}, output_path="machines_2024.pdf") + # plot_all_machines_in_year(2024, {"leir": 19}, output_path="machines_2024.pdf") plt.show() From 158b80c20394ee48a67d48f2c30eb73136645c3d Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Wed, 27 Nov 2024 13:26:58 +0100 Subject: [PATCH 3/4] return types --- scripts/shift_calculations.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/shift_calculations.py b/scripts/shift_calculations.py index 40cdabb1..0e049484 100644 --- a/scripts/shift_calculations.py +++ b/scripts/shift_calculations.py @@ -84,7 +84,7 @@ def same_day(d1: datetime, d2: datetime) -> bool: return (d1.year == d2.year) and (d1.month == d2.month) and (d1.day == d2.day) -def is_holiday(date: datetime): +def is_holiday(date: datetime) -> bool: """ True is date is on a known holiday. """ return any(same_day(date, h) for h in CERN_HOLIDAYS) @@ -138,11 +138,11 @@ def calculate_shift_parts(start_time: datetime, end_time: datetime) -> dict[str, return time_split -def time_delta_to_hours(time_delta: timedelta): +def time_delta_to_hours(time_delta: timedelta) -> float: return time_delta.total_seconds() / 3600 -def time_delta_to_shifts(time_delta: timedelta): +def time_delta_to_shifts(time_delta: timedelta) -> float: return time_delta_to_hours(time_delta) / SHIFT_LENGTH @@ -365,6 +365,8 @@ def plot_all_machines_in_year( fig.tight_layout() if output_path: fig.savefig(output_path) + + return fig From 5159a2d543fff3a47fae708e803a1cb6d55f8c78 Mon Sep 17 00:00:00 2001 From: JoschD <26184899+JoschD@users.noreply.github.com> Date: Wed, 27 Nov 2024 14:30:15 +0100 Subject: [PATCH 4/4] added requirements for scripts --- scripts/requirements.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 scripts/requirements.txt diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 00000000..5aa2d4f5 --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1,3 @@ +titlecase +pandas +matplotlib \ No newline at end of file