Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

simplify get_matching_cpu implementation #385

Merged
merged 1 commit into from
Jan 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .conda/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ requirements:
- psutil
- pynvml
- py-cpuinfo
- fuzzywuzzy
- prometheus_client
- rapidfuzz

test:
imports:
Expand Down
72 changes: 25 additions & 47 deletions codecarbon/core/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,10 @@
import shutil
import subprocess
import sys
import warnings
from typing import Dict, Tuple

import pandas as pd

with warnings.catch_warnings(record=True) as w:
from fuzzywuzzy import fuzz
from rapidfuzz import fuzz, process, utils

from codecarbon.core.rapl import RAPLFile
from codecarbon.core.units import Time
Expand Down Expand Up @@ -278,27 +275,6 @@ def _get_cpu_power_from_registry(self, cpu_model_raw: str) -> int:
return power
return None

@staticmethod
def _get_cpus(cpu_df, cpu_idxs) -> list:
return [cpu_df["Name"][idx] for idx in cpu_idxs]

@staticmethod
def _get_direct_matches(moodel: str, cpu_df: pd.DataFrame) -> list:
model_l = moodel.lower()
return [fuzz.ratio(model_l, cpu.lower()) for cpu in cpu_df["Name"]]

@staticmethod
def _get_token_set_matches(model: str, cpu_df: pd.DataFrame) -> list:
return [fuzz.token_set_ratio(model, cpu) for cpu in cpu_df["Name"]]

@staticmethod
def _get_single_direct_match(
ratios: list, max_ratio: int, cpu_df: pd.DataFrame
) -> str:
idx = ratios.index(max_ratio)
cpu_matched = cpu_df["Name"].iloc[idx]
return cpu_matched

def _get_matching_cpu(
self, model_raw: str, cpu_df: pd.DataFrame, greedy=False
) -> str:
Expand Down Expand Up @@ -332,32 +308,34 @@ def _get_matching_cpu(
THRESHOLD_DIRECT = 100
THRESHOLD_TOKEN_SET = 100

ratios_direct = self._get_direct_matches(model_raw, cpu_df)
ratios_token_set = self._get_token_set_matches(model_raw, cpu_df)
max_ratio_direct = max(ratios_direct)
max_ratio_token_set = max(ratios_token_set)
direct_match = process.extractOne(
model_raw,
cpu_df["Name"],
processor=lambda s: s.lower(),
scorer=fuzz.ratio,
score_cutoff=THRESHOLD_DIRECT,
)

# Check if a direct match exists
if max_ratio_direct >= THRESHOLD_DIRECT:
cpu_matched = self._get_single_direct_match(
ratios_direct, max_ratio_direct, cpu_df
)
return cpu_matched
if direct_match:
return direct_match[0]

# Check if an indirect match exists
if max_ratio_token_set < THRESHOLD_TOKEN_SET:
return None
cpu_idxs = self._get_max_idxs(ratios_token_set, max_ratio_token_set)
cpu_machings = self._get_cpus(cpu_df, cpu_idxs)
indirect_matches = process.extract(
model_raw,
cpu_df["Name"],
processor=utils.default_process,
scorer=fuzz.token_set_ratio,
score_cutoff=THRESHOLD_TOKEN_SET,
)

if (cpu_machings and len(cpu_machings) == 1) or greedy:
cpu_matched = cpu_machings[0]
return cpu_matched
return None
if indirect_matches:
if (
greedy
or len(indirect_matches) == 1
or indirect_matches[0][1] != indirect_matches[1][1]
):
return indirect_matches[0][0]

@staticmethod
def _get_max_idxs(ratios: list, max_ratio: int) -> list:
return [idx for idx, ratio in enumerate(ratios) if ratio == max_ratio]
return None

def _main(self) -> Tuple[str, int]:
"""
Expand Down
2 changes: 1 addition & 1 deletion docs/_sources/installation.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ The following packages are used by the CodeCarbon package, and will be installed
requests
psutil
py-cpuinfo
fuzzywuzzy
click
rapidfuzz
prometheus_client

Please refer to `setup.py <https://github.com/mlco2/codecarbon/blob/347a802a3478a5740f04b3a7b6b5f379b38000a7/setup.py#L6>`_ for the latest list of the packages used.
2 changes: 1 addition & 1 deletion docs/edit/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ The following packages are used by the CodeCarbon package, and will be installed
requests
psutil
py-cpuinfo
fuzzywuzzy
click
rapidfuzz
prometheus_client

Please refer to `setup.py <https://github.com/mlco2/codecarbon/blob/347a802a3478a5740f04b3a7b6b5f379b38000a7/setup.py#L6>`_ for the latest list of the packages used.
2 changes: 1 addition & 1 deletion docs/installation.html
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,10 @@ <h2>Dependencies<a class="headerlink" href="#dependencies" title="Link to this h
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>arrow
pandas
pynvml
rapidfuzz
requests
psutil
py-cpuinfo
fuzzywuzzy
click
prometheus_client
</pre></div>
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ sphinx-rtd-theme
responses
py-cpuinfo
psutil
fuzzywuzzy
prometheus_client
pre-commit
rapidfuzz
2 changes: 1 addition & 1 deletion requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ tox
numpy
psutil
requests-mock
fuzzywuzzy
rapidfuzz
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"requests",
"psutil",
"py-cpuinfo",
"fuzzywuzzy",
"rapidfuzz",
"click",
"prometheus_client",
]
Expand Down
Loading