In [1]:
import pandas as pd
import numpy as np
import random
import csv
import re

pd.set_option("display.max_rows", None)

In [2]:
cpu_df = pd.read_csv('cpu_mark.csv', index_col = 0)
gpu_df = pd.read_csv('gpu_mark.csv', index_col = 0)

In [10]:
cpu_name_list = [cpu_df['CPU Name'][idx].lower().replace('ghz','') for idx in cpu_df.index]
gpu_name_list = [gpu_df['GPU Name'][idx].lower() for idx in gpu_df.index]

In [7]:
from Levenshtein import ratio
"""
def LCS(s, t):
    n = len(s)
    m = len(t)
    s=' '+s
    t=' '+t

    f = [[0 for i in range(m + 1)] for j in range(n+1)]
     
    for i in range(1,n + 1):
        for j in range(1,m + 1):
            if(s[i - 1] == t[j - 1]):
                f[i][j] = f[i - 1][j - 1] + 1
            else:
                f[i][j] = max(f[i-1][j], f[i][j-1])
    return (f[n][m]+n+m)/(n+m)
"""
def find_relative_string(s, slist):
    found=[-1, -1]
    for i in range(len(slist)):
        acc=ratio(s, slist[i])
        if found[0]<acc:
            found[0]=acc
            found[1]=i
    return found

def transform_cpu(row):
    cpu = row['CPU']
    if pd.notna(cpu):
        cpu=cpu.lower()
        acc, pos=find_relative_string(cpu, cpu_name_list)
        if acc>0.5:
            row["CPU Name"] = cpu_df['CPU Name'][pos]
            row["CPU Mark"] = cpu_df['CPU Rank'][pos]
        else:
            row["CPU Name"] = np.nan
            row["CPU Mark"] = np.nan
    return row

def transform_gpu(row):
    gpu = row['GPU']
    if pd.notna(gpu):
        gpu=gpu.lower()
        acc, pos=find_relative_string(gpu, gpu_name_list)
        if acc>0.5:
            row["GPU Name"] = gpu_df['GPU Name'][pos]
            row["GPU Mark"] = gpu_df['GPU Rank'][pos]
        else:
            row["GPU Name"] = np.nan
            row["GPU Mark"] = np.nan
    return row


In [4]:
# CER

def character_error_rate(ref_string, hyp_string):
    # Initialize variables for counting errors and reference length
    num_errors = 0
    ref_length = len(ref_string)

    # Calculate the Levenshtein distance matrix
    dp = [[0] * (len(hyp_string) + 1) for _ in range(len(ref_string) + 1)]
    for i in range(len(ref_string) + 1):
        for j in range(len(hyp_string) + 1):
            if i == 0:
                dp[i][j] = j
            elif j == 0:
                dp[i][j] = i
            elif ref_string[i - 1] == hyp_string[j - 1]:
                dp[i][j] = dp[i - 1][j - 1]
            else:
                dp[i][j] = 1 + min(dp[i - 1][j - 1], dp[i - 1][j], dp[i][j - 1])

    # Traceback to calculate the number of errors
    i, j = len(ref_string), len(hyp_string)
    while i > 0 or j > 0:
        if i > 0 and j > 0 and ref_string[i - 1] == hyp_string[j - 1]:
            i -= 1
            j -= 1
        elif j > 0 and (i == 0 or dp[i][j - 1] + 1 == dp[i][j]):
            num_errors += 1  # Insertion
            j -= 1
        elif i > 0 and (j == 0 or dp[i - 1][j] + 1 == dp[i][j]):
            num_errors += 1  # Deletion
            i -= 1
        else:
            num_errors += 1  # Substitution
            i -= 1
            j -= 1

    # Calculate the Character Error Rate (CER)
    cer = num_errors / ref_length if ref_length > 0 else 0
    return cer

In [16]:
def find_relative_string(s, slist):
    found=[-1, -1]
    for i in range(len(slist)):
        acc=ratio(s, slist[i])
        if found[0]<acc:
            found[0]=acc
            found[1]=i
    return found


def transform_cpu_test(name):
    res = ''
    name_low = name.lower().replace('ghz','')
    acc, pos = find_relative_string(name_low, cpu_name_list)
    print(acc, pos)
    if acc > 0.5:
        res =  cpu_df['CPU Name'][pos]
    return res

def transform_gpu_test(name):
    res = ''
    name_low = name.lower()
    acc, pos = find_relative_string(name_low, gpu_name_list)
    print(acc, pos)
    if acc > 0.5:
        res =  gpu_df['GPU Name'][pos]
    return res

Intel Core i5-1335U (13th Gen)      87

Intel Core i7-1355U (13th Gen)      77

Intel Core i7-1360P (13th Gen)   

In [38]:
cpu_name = 'Intel Core i5 13th Gen 13420H (2.1)'
print(transform_cpu_test(cpu_name))

0.7272727272727273 2422
Intel Core i5 13420H


In [34]:
gpu_name = 'rtx 4050'
print(transform_gpu_test(gpu_name))
print(ratio('intel iris xe graphics', 'intel iris xe'))

0.8235294117647058 2306
RTX A4000
0.7428571428571429
