Skip to content

Commit

Permalink
Merge pull request #385 from maxbachmann/patch-1
Browse files Browse the repository at this point in the history
simplify get_matching_cpu implementation
  • Loading branch information
benoit-cty committed Jan 18, 2024
2 parents 285f72f + 68e626d commit 4a515fd
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 54 deletions.
2 changes: 1 addition & 1 deletion .conda/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ requirements:
- psutil
- pynvml
- py-cpuinfo
- fuzzywuzzy
- prometheus_client
- rapidfuzz

test:
imports:
Expand Down
72 changes: 25 additions & 47 deletions codecarbon/core/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,10 @@
import shutil
import subprocess
import sys
import warnings
from typing import Dict, Tuple

import pandas as pd

with warnings.catch_warnings(record=True) as w:
from fuzzywuzzy import fuzz
from rapidfuzz import fuzz, process, utils

from codecarbon.core.rapl import RAPLFile
from codecarbon.core.units import Time
Expand Down Expand Up @@ -278,27 +275,6 @@ def _get_cpu_power_from_registry(self, cpu_model_raw: str) -> int:
return power
return None

@staticmethod
def _get_cpus(cpu_df, cpu_idxs) -> list:
return [cpu_df["Name"][idx] for idx in cpu_idxs]

@staticmethod
def _get_direct_matches(moodel: str, cpu_df: pd.DataFrame) -> list:
model_l = moodel.lower()
return [fuzz.ratio(model_l, cpu.lower()) for cpu in cpu_df["Name"]]

@staticmethod
def _get_token_set_matches(model: str, cpu_df: pd.DataFrame) -> list:
return [fuzz.token_set_ratio(model, cpu) for cpu in cpu_df["Name"]]

@staticmethod
def _get_single_direct_match(
ratios: list, max_ratio: int, cpu_df: pd.DataFrame
) -> str:
idx = ratios.index(max_ratio)
cpu_matched = cpu_df["Name"].iloc[idx]
return cpu_matched

def _get_matching_cpu(
self, model_raw: str, cpu_df: pd.DataFrame, greedy=False
) -> str:
Expand Down Expand Up @@ -332,32 +308,34 @@ def _get_matching_cpu(
THRESHOLD_DIRECT = 100
THRESHOLD_TOKEN_SET = 100

ratios_direct = self._get_direct_matches(model_raw, cpu_df)
ratios_token_set = self._get_token_set_matches(model_raw, cpu_df)
max_ratio_direct = max(ratios_direct)
max_ratio_token_set = max(ratios_token_set)
direct_match = process.extractOne(
model_raw,
cpu_df["Name"],
processor=lambda s: s.lower(),
scorer=fuzz.ratio,
score_cutoff=THRESHOLD_DIRECT,
)

# Check if a direct match exists
if max_ratio_direct >= THRESHOLD_DIRECT:
cpu_matched = self._get_single_direct_match(
ratios_direct, max_ratio_direct, cpu_df
)
return cpu_matched
if direct_match:
return direct_match[0]

# Check if an indirect match exists
if max_ratio_token_set < THRESHOLD_TOKEN_SET:
return None
cpu_idxs = self._get_max_idxs(ratios_token_set, max_ratio_token_set)
cpu_machings = self._get_cpus(cpu_df, cpu_idxs)
indirect_matches = process.extract(
model_raw,
cpu_df["Name"],
processor=utils.default_process,
scorer=fuzz.token_set_ratio,
score_cutoff=THRESHOLD_TOKEN_SET,
)

if (cpu_machings and len(cpu_machings) == 1) or greedy:
cpu_matched = cpu_machings[0]
return cpu_matched
return None
if indirect_matches:
if (
greedy
or len(indirect_matches) == 1
or indirect_matches[0][1] != indirect_matches[1][1]
):
return indirect_matches[0][0]

@staticmethod
def _get_max_idxs(ratios: list, max_ratio: int) -> list:
return [idx for idx, ratio in enumerate(ratios) if ratio == max_ratio]
return None

def _main(self) -> Tuple[str, int]:
"""
Expand Down
2 changes: 1 addition & 1 deletion docs/_sources/installation.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ The following packages are used by the CodeCarbon package, and will be installed
requests
psutil
py-cpuinfo
fuzzywuzzy
click
rapidfuzz
prometheus_client
Please refer to `setup.py <https://github.com/mlco2/codecarbon/blob/347a802a3478a5740f04b3a7b6b5f379b38000a7/setup.py#L6>`_ for the latest list of the packages used.
2 changes: 1 addition & 1 deletion docs/edit/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ The following packages are used by the CodeCarbon package, and will be installed
requests
psutil
py-cpuinfo
fuzzywuzzy
click
rapidfuzz
prometheus_client
Please refer to `setup.py <https://github.com/mlco2/codecarbon/blob/347a802a3478a5740f04b3a7b6b5f379b38000a7/setup.py#L6>`_ for the latest list of the packages used.
2 changes: 1 addition & 1 deletion docs/installation.html
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,10 @@ <h2>Dependencies<a class="headerlink" href="#dependencies" title="Link to this h
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>arrow
pandas
pynvml
rapidfuzz
requests
psutil
py-cpuinfo
fuzzywuzzy
click
prometheus_client
</pre></div>
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ sphinx-rtd-theme
responses
py-cpuinfo
psutil
fuzzywuzzy
prometheus_client
pre-commit
rapidfuzz
2 changes: 1 addition & 1 deletion requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ tox
numpy
psutil
requests-mock
fuzzywuzzy
rapidfuzz
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"requests",
"psutil",
"py-cpuinfo",
"fuzzywuzzy",
"rapidfuzz",
"click",
"prometheus_client",
]
Expand Down

0 comments on commit 4a515fd

Please sign in to comment.