In [1]:
import pandas as pd
import numpy as np
import time

In [2]:
cpu = pd.read_csv('../part_cpu230921.csv')
cpu_bench = pd.read_csv('../benchmark_cpu.csv')

In [3]:
cpu.name

0         AMD 라이젠5-5세대 7600
1      AMD 라이젠7-5세대 7800X3D
2       인텔 코어i5-13세대 13400F
3        AMD 라이젠5-5세대 7500F
4        AMD 라이젠5-5세대 7600X
               ...         
420             인텔 펜티엄 G870
421             인텔 펜티엄 G850
422             인텔 펜티엄 G840
423            인텔 펜티엄 E2200
424          인텔 코어2듀오 E6600
Name: name, Length: 425, dtype: object

In [4]:
cpu.loc[cpu['name'].str.contains('코어'), 'name']

2       인텔 코어i5-13세대 13400F
6       인텔 코어i9-13세대 13900K
7       인텔 코어i5-12세대 12400F
8      인텔 코어i5-13세대 13600KF
9       인텔 코어i7-13세대 13700K
               ...         
412        인텔 코어i3-4세대 4330
413       인텔 코어i3-7세대 7300T
414          인텔 코어2듀오 E8200
415          인텔 코어2듀오 E6750
424          인텔 코어2듀오 E6600
Name: name, Length: 205, dtype: object

In [21]:
cpu_bench.loc[cpu_bench['name'].str.contains('GHz'), 'name']

41       Intel Xeon Platinum 8380 @ 2.30GHz
50              Intel Xeon W-3375 @ 2.50GHz
51              Intel Xeon W-3365 @ 2.70GHz
60      Intel Xeon Platinum 8375C @ 2.90GHz
66       Intel Xeon Platinum 8358 @ 2.60GHz
                       ...                 
4404                  Intel Celeron 1.70GHz
4407                  Intel Celeron 1.80GHz
4410                Intel Pentium 4 1.70GHz
4411                Intel Pentium 4 1.50GHz
4412                Intel Pentium 4 1.60GHz
Name: name, Length: 2217, dtype: object

In [6]:
unique_values = set()

for row in cpu.name.str.split(r' |-'):
    for value in row:
        if '+' in value: unique_values.add(value)

In [7]:
unique_values

{'+'}

In [8]:
cpu_regex_map = {'인텔': 'Intel ', '라이젠': 'Ryzen ', '코어2': 'Core2 ', '코어': 'Core ', '스레드리퍼': 'Threadripper ',
          r'-\d+세대': ' ', '펜티엄': 'Pentium ', '셀러론': 'Celeron ', '골드': 'Gold ',
          'X-시리즈': ' ', '어벤져스 에디션': ' ', '제온': 'Xeon ', '브론즈': 'Beonze ',
          '스케일러블': ' ', '실버': 'Silver ', '애슬론': 'Athlon', '듀오': ' Duo', '쿼드 ': ' Quad ',
          '플래티넘': 'Platinum ', '\+ ': ' ', 'V6': ' v6 ', 'V5': ' v5 ', 'V2': ' v2 ', 'V4': ' v4 ',
                'V3': ' v3 '}
bench_regex_map = {'@': ' ', '\+': '', 'GHz': ''}
none_regex_map = {r' +': ' '}

In [9]:
cpu_reg = cpu.replace(regex=cpu_regex_map).replace(regex=none_regex_map)
ben_reg = cpu_bench.replace(regex=bench_regex_map).replace(regex=none_regex_map)

In [10]:
c_names = cpu_reg.name.str.split(r' |-')
bench_names = ben_reg.name.str.split(r' |-')

In [11]:
c_names

0           [AMD, Ryzen, 5, 7600]
1        [AMD, Ryzen, 7, 7800X3D]
2       [Intel, Core, i5, 13400F]
3          [AMD, Ryzen, 5, 7500F]
4          [AMD, Ryzen, 5, 7600X]
                  ...            
420        [Intel, Pentium, G870]
421        [Intel, Pentium, G850]
422        [Intel, Pentium, G840]
423       [Intel, Pentium, E2200]
424    [Intel, Core2, Duo, E6600]
Name: name, Length: 425, dtype: object

In [12]:
bench_names

0                             [AMD, EPYC, 9654]
1                            [AMD, EPYC, 9554P]
2                            [AMD, EPYC, 9474F]
3                      [Intel, Xeon, w9, 3495X]
4       [AMD, Ryzen, Threadripper, PRO, 5995WX]
                         ...                   
4412               [Intel, Pentium, 4, 1.60GHz]
4413               [Intel, Pentium, 4, 1400MHz]
4414               [Intel, Pentium, 4, 1500MHz]
4415                       [VIA, Eden, 1000MHz]
4416               [Intel, Pentium, 4, 1300MHz]
Name: name, Length: 4417, dtype: object

In [13]:
cmc = cpu_reg['memory_clock'].mul(0.001)

for cn, cm in zip(c_names, cmc):
    cn.append(cm)

In [14]:
name_list = [''] * len(c_names)
bench_list = [0] * len(c_names)
for i in range(len(c_names)):
    cn = c_names.loc[i]
    max_val = 0
    for j in range(len(bench_names)):
        bn = bench_names.loc[j]
        res = len(set(cn) & set(bn))
        val = res / float(len(set(cn) | set(bn))) * 100
        if val >= max_val:
            max_val = val
            bench_list[i] = cpu_bench.loc[j, 'count']
            name_list[i] = cpu_bench.loc[j, 'name']
    percent = i / len(c_names) * 100
    print(f'\rcpu: {i+1}/{len(c_names)} ({percent:.0f}%)', end='')

cpu: 425/425 (100%)

In [15]:
cpu['bench_mark'] = bench_list

In [16]:
today = time.strftime('%y%m%d')
pd.DataFrame(cpu).to_csv(f'../part_cpu_wb{today}.csv', index=None)

In [17]:
cpu_bench.loc[cpu_bench['count'] == 25588, 'name']

293    Intel Core i5-13400F
Name: name, dtype: object

In [18]:
for c_name, name, bench in zip(cpu.name, name_list, bench_list):
    print(f':{bench:5d}::{name:35s}::{c_name}:')

:27651::AMD Ryzen 5 7600                   ::AMD 라이젠5-5세대 7600:
:34740::AMD Ryzen 7 7800X3D                ::AMD 라이젠7-5세대 7800X3D:
:25588::Intel Core i5-13400F               ::인텔 코어i5-13세대 13400F:
:25578::AMD Ryzen 5 7500F                  ::AMD 라이젠5-5세대 7500F:
:28830::AMD Ryzen 5 7600X                  ::AMD 라이젠5-5세대 7600X:
:19922::AMD Ryzen 5 5600G                  ::AMD 라이젠5-4세대 5600G:
:59769::Intel Core i9-13900K               ::인텔 코어i9-13세대 13900K:
:19639::Intel Core i5-12400F               ::인텔 코어i5-12세대 12400F:
:38230::Intel Core i5-13600KF              ::인텔 코어i5-13세대 13600KF:
:46838::Intel Core i7-13700K               ::인텔 코어i7-13세대 13700K:
:21936::AMD Ryzen 5 5600X                  ::AMD 라이젠5-4세대 5600X:
:21618::AMD Ryzen 5 5600                   ::AMD 라이젠5-4세대 5600:
:19483::Intel Core i5-12400                ::인텔 코어i5-12세대 12400:
:32496::Intel Core i5-13500                ::인텔 코어i5-13세대 13500:
:16267::AMD Ryzen 5 PRO 4650G              ::AMD 라이젠5 PRO 4650G:
:36365::AMD Ryzen 7