# Extract openbenchmarking.org results

### Import libraries

In [38]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import os

# Step 1 : list the different urls of openbenchmarking.org

1 url = 1 test profile

In [13]:
parser = BeautifulSoup(open("./data/pages/test_profile.html").read(), 'html.parser')

listing = [p for p in parser.find_all('div', class_='row')]

lines = []

for l in listing:
    
    content = l.findChildren('div', class_='col-sm-6', recursive=False)
    
    name = None
    url = None
    type_pts = None
    description = None
    
    if len(content) > 0:
        first  = content[0].findChildren("a")[0]
        name, url = first.get_text(), first.get("href")
        type_pts = content[1].findChildren("h4")[0].get_text()
    
    content12 = l.findChildren('div', class_='col-sm-12', recursive=False)
    
    if len(content12) > 0:
        d = content12[0].findChildren('span')
        if len(d) > 0:
            description = d[0].get_text().replace(",","").replace("\n", "")
    
    lines.append((name, description, type_pts, url))

df = pd.DataFrame(lines).dropna()
df.columns = ["name", "description", "type", "url"]
df.to_csv("./data/list_urls.csv")
df

Unnamed: 0,name,description,type,url
2,AI Benchmark Alpha,AI Benchmark Alpha is a Python library for eva...,System,https://openbenchmarking.org/test/pts/ai-bench...
3,Aircrack-ng,Aircrack-ng is a tool for assessing WiFi/WLAN ...,Processor,https://openbenchmarking.org/test/pts/aircrack-ng
4,Algebraic Multi-Grid Benchmark,AMG is a parallel algebraic multigrid solver f...,Processor,https://openbenchmarking.org/test/pts/amg
5,AOBench,AOBench is a lightweight ambient occlusion ren...,Processor,https://openbenchmarking.org/test/pts/aobench
6,AOM AV1,This is a test of the AOMedia AV1 encoder (lib...,Processor,https://openbenchmarking.org/test/pts/aom-av1
...,...,...,...,...
466,Xsbench,XSBench is a mini-app representing a key compu...,System,https://openbenchmarking.org/test/pts/xsbench
467,Xsbench OpenCL,Xsbench benchmark in OpenCL via GPUOpen.,System,https://openbenchmarking.org/test/pts/xsbench-cl
468,Y-Cruncher,Y-Cruncher is a multi-threaded Pi benchmark.,Processor,https://openbenchmarking.org/test/pts/y-cruncher
469,YafaRay,YafaRay is an open-source physically based mon...,Processor,https://openbenchmarking.org/test/pts/yafaray


# Step 2 : Extract the content of test profiles

Input : 
- url of the webpage

Output : 
- dataset of performances related to the system under test

In [33]:
class ExtractorTP():
    # input : the url of an openbenchmarking.org webpage
    # output : a dataframe containing the average performances for a configurable system
    
    def __init__(self, url):
        self.url = url
        html_text = requests.get(self.url).text
        self.parser = BeautifulSoup(html_text, 'html.parser')
        # to remove the "Low-Tier"-like lines
        self.banwords = ["Low-Tier", "Median", "Mid-Tier"]
    
    def extract_data(self):
        row_text = self.parser.find_all('div', class_='div_table_row')
        lines = [self.extract_row(rt) for rt in row_text]
        df = pd.DataFrame(lines).dropna()
        df.columns = ["idproc", "descproc", "percentile", "nbproc", "perf"]
        return df
        
    def extract_row(self, row_text):
        content_line = []
        cells = row_text.find_all('div', class_='div_table_cell')
        if cells[0].get_text() not in self.banwords:
            if cells[0].find("a"):
                content_line.append(cells[0].findChildren("a")[0].get("href"))
            else:
                content_line.append("")
            content_line.extend([c.get_text() for c in cells])
        return content_line

### Test with x264

In [11]:
ext = ExtractorTP('https://openbenchmarking.org/test/pts/x264')
ext.extract_data()

Unnamed: 0,idproc,descproc,percentile,nbproc,perf
0,/s/AMD+EPYC+7763+64-Core,AMD EPYC 7763 64-Core,100th,6,236 +/- 3
1,/s/AMD+EPYC+75F3+32-Core,AMD EPYC 75F3 32-Core,100th,8,229 +/- 11
2,/s/AMD+EPYC+7713+64-Core,AMD EPYC 7713 64-Core,99th,6,225 +/- 2
3,/s/2+x+AMD+EPYC+75F3+32-Core,2 x AMD EPYC 75F3 32-Core,99th,13,223 +/- 28
4,/s/2+x+AMD+EPYC+7713+64-Core,2 x AMD EPYC 7713 64-Core,99th,7,222 +/- 3
...,...,...,...,...,...
176,/s/Intel+Pentium+Dual+E2220,Intel Pentium Dual E2220,6th,3,5
177,/s/ARMv8+Cortex-A55+4-Core,ARMv8 Cortex-A55 4-Core,5th,3,4
178,/s/Intel+Pentium+Dual+T2310,Intel Pentium Dual T2310,4th,7,3
179,/s/ARMv7+Cortex-A53+4-Core,ARMv7 Cortex-A53 4-Core,4th,3,2


### Test with x265

In [12]:
ext = ExtractorTP('https://openbenchmarking.org/test/pts/x265')
ext.extract_data()

Unnamed: 0,idproc,descproc,percentile,nbproc,perf
0,/s/AMD+EPYC+75F3+32-Core,AMD EPYC 75F3 32-Core,100th,9,31.6 +/- 0.3
1,/s/Intel+Xeon+Platinum+8380,Intel Xeon Platinum 8380,100th,6,31.2
2,/s/AMD+EPYC+7643+48-Core,AMD EPYC 7643 48-Core,100th,3,30.1 +/- 0.3
3,/s/AMD+EPYC+7763+64-Core,AMD EPYC 7763 64-Core,99th,14,30.0 +/- 0.7
4,/s/AMD+EPYC+74F3+24-Core,AMD EPYC 74F3 24-Core,98th,6,29.3 +/- 0.1
...,...,...,...,...,...
154,/s/Intel+Core+i3-4130,Intel Core i3-4130,7th,3,2.8
155,/s/AMD+A10-7850K+APU,AMD A10-7850K APU,7th,6,2.6 +/- 0.1
156,/s/AMD+Ryzen+3+3200U,AMD Ryzen 3 3200U,6th,3,2.6
157,/s/POWER9+4-Core,POWER9 4-Core,4th,7,1.3


# Step 3 : Extract the description of processors

In [65]:
class ExtractorPROC():
    # input : the url of an openbenchmarking.org webpage listing the technical details of a processor
    # output : a dataframe containing the average performances for a configurable system
    
    def __init__(self, url):
        self.url = url
        html_text = requests.get(self.url).text
        self.parser = BeautifulSoup(html_text, 'html.parser')
    
    def extract_data(self):
        row_text = self.parser.find_all('pre')
        lines = [rt.get_text() for rt in row_text]
        return lines

### Test with one processor

In [66]:
ext = ExtractorPROC('https://openbenchmarking.org/s/AMD+EPYC+74F3+24-Core')
catproc, lscpu = ext.extract_data()
print(catproc)

processor	: 0
vendor_id	: AuthenticAMD
cpu family	: 25
model		: 1
model name	: AMD EPYC 74F3 24-Core Processor
stepping	: 1
microcode	: 0xa001119
cpu MHz		: 3200.000
cache size	: 512 KB
physical id	: 0
siblings	: 48
core id		: 0
cpu cores	: 24
apicid		: 0
initial apicid	: 0
fpu		: yes
fpu_exception	: yes
cpuid level	: 16
wp		: yes
flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 invpcid_single hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1

# Step 4 :  Use the list of systems of Step 1 and the code of Step 2 to extract all the systems

Goal : get a dataframe for each system (eg the one for x264 displayed in step 2)

uncomment the following cell if you want to download the data


requires to run step 1 first

urls = df["url"]

for u in urls:
    split_url = u.split("/")
    name = split_url[4] + "-" + split_url[5]
    e = ExtractorTP(u)
    try:
        e.extract_data().to_csv("./data/systems/"+name+".csv")
    except:
        print(u)

# Step 5 :  Use the list of processor tested on the systems in Step 4 and extract all the informations

Goal : get metadata for each processor (eg the one for AMD EPYC 74F3 24-Core displayed in step 3)

In [57]:
prof_dir = "./data/systems/"

procs = []

for f in os.listdir(prof_dir):
    df = pd.read_table(prof_dir+f, sep=',', index_col = 0)
    for d in df["idproc"]:
        if d not in procs:
            procs.append(d)

['/s/AMD+Ryzen+5+5600X+6-Core',
 '/s/Intel+Core+i9-11900K',
 '/s/AMD+Ryzen+9+5950X+16-Core',
 '/s/AMD+Ryzen+7+5800X+8-Core',
 '/s/AMD+Ryzen+9+5900X+12-Core']

Extract all metadata

In [70]:
prefix_urls = "https://openbenchmarking.org"

no_info_urls = []
    
for i in range(len(procs)):
    
    p = procs[i]
    proc_url = prefix_urls+p
    proc_name = p.replace("/s/", "").replace("+", "")
    proc_dir = "./data/procs/" + proc_name + "/"
    
    
    try:
        ext = ExtractorPROC(proc_url)
        catproc, lscpu = ext.extract_data()

        if not os.path.isdir(proc_dir):
            os.mkdir(proc_dir)

        with open(proc_dir+"catproc.txt", "w") as t:
            t.write(catproc)

        with open(proc_dir+"lscpu.txt", "w") as t:
            t.write(lscpu)

        print("Done with :", proc_url)
        
    except:
        no_info_urls.append(proc_url)
        print("No information with : ", proc_url)

Done with : https://openbenchmarking.org/s/AMD+Ryzen+5+5600X+6-Core
Done with : https://openbenchmarking.org/s/Intel+Core+i9-11900K
Done with : https://openbenchmarking.org/s/AMD+Ryzen+9+5950X+16-Core
Done with : https://openbenchmarking.org/s/AMD+Ryzen+7+5800X+8-Core
Done with : https://openbenchmarking.org/s/AMD+Ryzen+9+5900X+12-Core
Done with : https://openbenchmarking.org/s/Intel+Core+i5-11600K
Done with : https://openbenchmarking.org/s/Intel+Core+i9-10900K
Done with : https://openbenchmarking.org/s/Intel+Core+i9-9900K
Done with : https://openbenchmarking.org/s/Intel+Core+i5-10600K
Done with : https://openbenchmarking.org/s/AMD+Ryzen+9+3900XT+12-Core
Done with : https://openbenchmarking.org/s/AMD+Ryzen+5+5600G
Done with : https://openbenchmarking.org/s/AMD+Ryzen+9+3950X+16-Core
Done with : https://openbenchmarking.org/s/AMD+Ryzen+3+3300X+4-Core
Done with : https://openbenchmarking.org/s/AMD+Ryzen+Threadripper+3970X+32-Core
Done with : https://openbenchmarking.org/s/AMD+Ryzen+7+5700

Done with : https://openbenchmarking.org/s/2+x+AMD+EPYC+7343+16-Core
Done with : https://openbenchmarking.org/s/AMD+EPYC+7543+32-Core
Done with : https://openbenchmarking.org/s/AMD+Ryzen+Threadripper+2990WX+32-Core
Done with : https://openbenchmarking.org/s/Intel+Xeon+Platinum+8380
Done with : https://openbenchmarking.org/s/AMD+EPYC+7453+28-Core
Done with : https://openbenchmarking.org/s/AMD+EPYC+74F3+24-Core
Done with : https://openbenchmarking.org/s/AMD+EPYC+7443+24-Core
Done with : https://openbenchmarking.org/s/AMD+EPYC+7551+32-Core
Done with : https://openbenchmarking.org/s/AMD+Ryzen+Threadripper+2970WX+24-Core
Done with : https://openbenchmarking.org/s/Intel+Xeon+Gold+6258R
Done with : https://openbenchmarking.org/s/Intel+Xeon+Platinum+8280
Done with : https://openbenchmarking.org/s/AMD+EPYC+73F3+16-Core
Done with : https://openbenchmarking.org/s/AMD+EPYC+7343+16-Core
Done with : https://openbenchmarking.org/s/AMD+Ryzen+Threadripper+2950X+16-Core
Done with : https://openbenchmark

Done with : https://openbenchmarking.org/s/Intel+Core+i7-2600K
Done with : https://openbenchmarking.org/s/Intel+Core+i7-4710MQ
Done with : https://openbenchmarking.org/s/POWER9+4-Core
Done with : https://openbenchmarking.org/s/Intel+Core+i5-7500
Done with : https://openbenchmarking.org/s/Intel+Core+i5-4690
Done with : https://openbenchmarking.org/s/Intel+Core+i5-6600
Done with : https://openbenchmarking.org/s/ARMv8+rev+0+8-Core
Done with : https://openbenchmarking.org/s/Intel+Core+i5-10210U
Done with : https://openbenchmarking.org/s/Intel+Core+i5-7400
Done with : https://openbenchmarking.org/s/Intel+Core+i7-4710HQ
Done with : https://openbenchmarking.org/s/Intel+Core+i7-3615QM
No information with :  https://openbenchmarking.org/s/Intel+Atom+C3808
Done with : https://openbenchmarking.org/s/ARMv8+Neoverse-N1+4-Core
Done with : https://openbenchmarking.org/s/Intel+Core+i5-6300HQ
Done with : https://openbenchmarking.org/s/AMD+Athlon+3000G
Done with : https://openbenchmarking.org/s/AMD+FX-6

Done with : https://openbenchmarking.org/s/Intel+Core+i7+930
Done with : https://openbenchmarking.org/s/AMD+FX-8350+Eight-Core
Done with : https://openbenchmarking.org/s/Intel+Xeon+E5-2620+0
Done with : https://openbenchmarking.org/s/2+x+Intel+Xeon+E5-2620+0
Done with : https://openbenchmarking.org/s/AMD+Phenom+II+X4+B55
Done with : https://openbenchmarking.org/s/2+x+Intel+Xeon+E5520
Done with : https://openbenchmarking.org/s/AMD+Opteron+6380
Done with : https://openbenchmarking.org/s/2+x+Intel+Xeon+E5420
Done with : https://openbenchmarking.org/s/Intel+Core+2+Quad+Q9300
Done with : https://openbenchmarking.org/s/Intel+Core+i3-3217U
Done with : https://openbenchmarking.org/s/AMD+Phenom+II+X4+945
Done with : https://openbenchmarking.org/s/Intel+Atom+D525
Done with : https://openbenchmarking.org/s/AMD+Phenom+8650+Triple-Core
Done with : https://openbenchmarking.org/s/Intel+Pentium+Dual+T2370
No information with :  https://openbenchmarking.org/s/SPARC-T5+sparcv9
Done with : https://openbe

Done with : https://openbenchmarking.org/s/Intel+Pentium+G3260
Done with : https://openbenchmarking.org/s/Intel+Core+i3-3120M
Done with : https://openbenchmarking.org/s/AMD+A10-6700+APU
Done with : https://openbenchmarking.org/s/AMD+Phenom+II+X2+560
Done with : https://openbenchmarking.org/s/Intel+Core+i5-3210M
No information with :  https://openbenchmarking.org/s/Intel+Pentium+J2900
Done with : https://openbenchmarking.org/s/Intel+Core+i3+370M
Done with : https://openbenchmarking.org/s/AMD+Athlon+II+X2+250
Done with : https://openbenchmarking.org/s/AMD+Phenom+9500
Done with : https://openbenchmarking.org/s/AMD+Athlon+II+X2+260
Done with : https://openbenchmarking.org/s/Intel+Celeron+J1900
Done with : https://openbenchmarking.org/s/Intel+Pentium+G645
Done with : https://openbenchmarking.org/s/AMD+Athlon+II+X2+245
Done with : https://openbenchmarking.org/s/AMD+A4-6300+APU
Done with : https://openbenchmarking.org/s/AMD+A4-5000+APU
Done with : https://openbenchmarking.org/s/AMD+A6-5400K+A

Done with : https://openbenchmarking.org/s/2+x+AMD+EPYC+7371+16-Core
Done with : https://openbenchmarking.org/s/Intel+Core+i5-3380M
Done with : https://openbenchmarking.org/s/Intel+Xeon+Gold+6210U
No information with :  https://openbenchmarking.org/s/AMD+EPYC+3255+8-Core+Temp
Done with : https://openbenchmarking.org/s/Intel+Celeron+J3455
Done with : https://openbenchmarking.org/s/2+x+Intel+%240000%25
Done with : https://openbenchmarking.org/s/2+x+Intel+Xeon+E5-2696+v3
Done with : https://openbenchmarking.org/s/2+x+Intel+Xeon+E5-2620+v4
Done with : https://openbenchmarking.org/s/Intel+Xeon+Gold+6212U
Done with : https://openbenchmarking.org/s/Intel+Xeon+D-1527
Done with : https://openbenchmarking.org/s/2+x+Intel+Xeon+X5670
Done with : https://openbenchmarking.org/s/Intel+Xeon+X3470
Done with : https://openbenchmarking.org/s/Intel+Xeon+X5550
Done with : https://openbenchmarking.org/s/2+x+Intel+Xeon+E5-2697+v3
Done with : https://openbenchmarking.org/s/Intel+Xeon+W-2155
Done with : https:

Done with : https://openbenchmarking.org/s/NVIDIA+GeForce+RTX+2080
No information with :  https://openbenchmarking.org/s/eVGA+NVIDIA+GeForce+GTX+980
Done with : https://openbenchmarking.org/s/MSI+AMD+POLARIS10
Done with : https://openbenchmarking.org/s/XFX+AMD+Radeon+HD+7900
No information with :  https://openbenchmarking.org/s/AMD+Radeon+R9+Fury
Done with : https://openbenchmarking.org/s/AMD+Radeon+RX+580
Done with : https://openbenchmarking.org/s/Sapphire+AMD+Radeon+R9+FURY
Done with : https://openbenchmarking.org/s/AMD+Radeon+RX+470
Done with : https://openbenchmarking.org/s/AMD+Radeon+RX+480
Done with : https://openbenchmarking.org/s/AMD+Radeon+RX+460
Done with : https://openbenchmarking.org/s/Intel+Xeon+E-2286M
Done with : https://openbenchmarking.org/s/Intel+Core+i9-8950HK
Done with : https://openbenchmarking.org/s/Intel+Core+i9-7900X
Done with : https://openbenchmarking.org/s/Intel+Core+i9-9880H
Done with : https://openbenchmarking.org/s/Intel+Core+i7-8809G
Done with : https://o

No information with :  https://openbenchmarking.org/s/250GB+Western+Digital+WDS250G1B0A-
No information with :  https://openbenchmarking.org/s/3+x+2000GB+Western+Digital+WD20EARS-00M
No information with :  https://openbenchmarking.org/s/CIE+M8+T405
No information with :  https://openbenchmarking.org/s/120GB+ADATA+SP550
No information with :  https://openbenchmarking.org/s/63GB+SanDisk+SDSSDP06
No information with :  https://openbenchmarking.org/s/2+x+960GB+SAMSUNG+MZQLB960HAJR-00007
No information with :  https://openbenchmarking.org/s/240GB+TOSHIBA-RC100
No information with :  https://openbenchmarking.org/s/3+x+3001GB+Seagate+ST3000DM008-2DM1
No information with :  https://openbenchmarking.org/s/60GB+DREVO+X1+SSD
No information with :  https://openbenchmarking.org/s/128GB+INTEL+SSDPEKKW128G8
No information with :  https://openbenchmarking.org/s/180GB+INTEL+SSDSC2KW18
No information with :  https://openbenchmarking.org/s/320GB+TOSHIBA+MK3255GS
No information with :  https://openbenchma

No information with :  https://openbenchmarking.org/s/1000GB+Western+Digital+WD10JPCX-24U
No information with :  https://openbenchmarking.org/s/1000GB+DefaultValue0
No information with :  https://openbenchmarking.org/s/6+x+240GB+SAMSUNG+MZ7WD240
No information with :  https://openbenchmarking.org/s/1198GB+PERC+H730+Mini
No information with :  https://openbenchmarking.org/s/3+x+320GB+Western+Digital+WD3200AAKX-0
No information with :  https://openbenchmarking.org/s/1199GB+PERC+H730+Adp
No information with :  https://openbenchmarking.org/s/500GB+Hitachi+HCC54755
No information with :  https://openbenchmarking.org/s/2+x+300GB+INTEL+SSDSC2BB30
No information with :  https://openbenchmarking.org/s/3357GB+PERC+H740P+Mini
No information with :  https://openbenchmarking.org/s/299GB+PERC+H330+Mini
No information with :  https://openbenchmarking.org/s/12+x+54GB+FlashArray
No information with :  https://openbenchmarking.org/s/1000GB+Seagate+ST91000640NS
No information with :  https://openbenchmar

No information with :  https://openbenchmarking.org/s/32GB+OCZ+SOLID_SSD
No information with :  https://openbenchmarking.org/s/230GB+M.2
No information with :  https://openbenchmarking.org/s/2+x+40GB+INTEL+SSDSA2CT04
No information with :  https://openbenchmarking.org/s/62GB+Transcend
No information with :  https://openbenchmarking.org/s/64GB+00000
No information with :  https://openbenchmarking.org/s/1000GB+HGST+HTE541010A9
No information with :  https://openbenchmarking.org/s/320GB+Seagate+ST3320413CS
No information with :  https://openbenchmarking.org/s/1000GB+Seagate+STM31000528AS
No information with :  https://openbenchmarking.org/s/1000GB+PERC+5
No information with :  https://openbenchmarking.org/s/64GB+Fordisk+S860+64G
No information with :  https://openbenchmarking.org/s/73GB+PERC+5
No information with :  https://openbenchmarking.org/s/120GB+OCZ-AGILITY2
No information with :  https://openbenchmarking.org/s/2+x+500GB+Western+Digital+WD5003ABYX-0
No information with :  https://o

Done with : https://openbenchmarking.org/s/Intel+Core+i5-2450M
No information with :  https://openbenchmarking.org/s/4+x+Intel+Xeon+X5355
No information with :  https://openbenchmarking.org/s/Intel+Xeon+X5355
Done with : https://openbenchmarking.org/s/Intel+Core+i5-4250U
Done with : https://openbenchmarking.org/s/Intel+Core+2+Quad+Q8300
Done with : https://openbenchmarking.org/s/AMD+FX-4100
No information with :  https://openbenchmarking.org/s/Intel+Core+i7+Q+720
Done with : https://openbenchmarking.org/s/Intel+Pentium+1403+v2
Done with : https://openbenchmarking.org/s/AMD+Opteron+2374+HE
No information with :  https://openbenchmarking.org/s/Intel+Core+i5+M+460
Done with : https://openbenchmarking.org/s/Sapphire+AMD+Radeon+RX+6800
Done with : https://openbenchmarking.org/s/MSI+NVIDIA+GeForce+RTX+2080+Ti
Done with : https://openbenchmarking.org/s/ASUS+NVIDIA+GeForce+GTX+1080
Done with : https://openbenchmarking.org/s/eVGA+NVIDIA+GeForce+GTX+1070+Ti
Done with : https://openbenchmarking.o

No information with :  https://openbenchmarking.org/s/AMD+Radeon+Pro+WX+4100
No information with :  https://openbenchmarking.org/s/MSI+NVIDIA+GeForce+GTX+560+Ti
No information with :  https://openbenchmarking.org/s/Gigabyte+NVIDIA+GeForce+GTX+560+Ti
No information with :  https://openbenchmarking.org/s/NVIDIA+GeForce+GTX+950M
No information with :  https://openbenchmarking.org/s/ASUS+NVIDIA+GeForce+GTX+560+Ti
No information with :  https://openbenchmarking.org/s/NVIDIA+GeForce+MX150
No information with :  https://openbenchmarking.org/s/Gigabyte+AMD+Radeon+R9+270X
No information with :  https://openbenchmarking.org/s/MSI+AMD+Radeon+R9+270
No information with :  https://openbenchmarking.org/s/MSI+AMD+Radeon+Vega+8+Mobile
No information with :  https://openbenchmarking.org/s/Sapphire+AMD+Radeon+HD+6900
No information with :  https://openbenchmarking.org/s/ASUS+AMD+Radeon+HD+7700
Done with : https://openbenchmarking.org/s/ASUS+NVIDIA+GeForce+GTX+950M
No information with :  https://openbenc

Done with : https://openbenchmarking.org/s/eVGA+NVIDIA+GeForce+GTX+1060+6GB
Done with : https://openbenchmarking.org/s/NVIDIA+GeForce+GTX+770
No information with :  https://openbenchmarking.org/s/Gigabyte+NVIDIA+GeForce+GTX+750+Ti
No information with :  https://openbenchmarking.org/s/NVIDIA+GeForce+GTX+560+Ti
No information with :  https://openbenchmarking.org/s/AMD+Radeon+HD+7800
Done with : https://openbenchmarking.org/s/MSI+NVIDIA+GeForce+GTX+750+Ti
Done with : https://openbenchmarking.org/s/Sapphire+AMD+Radeon+R7+370
No information with :  https://openbenchmarking.org/s/ASUS+NVIDIA+GeForce+GTX+960M
No information with :  https://openbenchmarking.org/s/Gigabyte+NVIDIA+GeForce+GTX+560
Done with : https://openbenchmarking.org/s/AMD+Radeon+RX+590
Done with : https://openbenchmarking.org/s/Gigabyte+AMD+Radeon+RX+460
No information with :  https://openbenchmarking.org/s/AMD+Radeon+Vega+3
No information with :  https://openbenchmarking.org/s/NVIDIA+GeForce+GTX+470
No information with :  h

No information with :  https://openbenchmarking.org/s/320GB+Western+Digital+WD3200BPVT-1
No information with :  https://openbenchmarking.org/s/500GB+Western+Digital+WD5000LPVX-7
No information with :  https://openbenchmarking.org/s/1000GB+Western+Digital+WD10EZEX-00B
Done with : https://openbenchmarking.org/s/AMD+POLARIS10
Done with : https://openbenchmarking.org/s/Zotac+NVIDIA+GeForce+GTX+1060
No information with :  https://openbenchmarking.org/s/ASUS+AMD+Radeon+HD+7900
No information with :  https://openbenchmarking.org/s/ASUS+AMD+Radeon+HD+7970
Done with : https://openbenchmarking.org/s/Sapphire+AMD+Radeon+R9+Fury
Done with : https://openbenchmarking.org/s/NVIDIA+GeForce+GTX+970
No information with :  https://openbenchmarking.org/s/AMD+Polaris12
Done with : https://openbenchmarking.org/s/HIS+AMD+Radeon+HD+7950
Done with : https://openbenchmarking.org/s/Sapphire+AMD+Radeon+HD+5870
Done with : https://openbenchmarking.org/s/XFX+AMD+Radeon+RX+460
Done with : https://openbenchmarking.or

No information with :  https://openbenchmarking.org/s/2+x+8192+MB+1600MHz+Micron+16KTF1G64AZ-1G6E1
No information with :  https://openbenchmarking.org/s/4+x+8192+MB+2133MHz+HMA81GU7CJR8N-VK
No information with :  https://openbenchmarking.org/s/2+x+4096+MB+2400MHz
No information with :  https://openbenchmarking.org/s/2+x+4096+MB+2667MHz+Kingston+KHX2666C15D4
No information with :  https://openbenchmarking.org/s/2+x+4096+MB+1600MHz+Kingston+KHX1600C9D3
No information with :  https://openbenchmarking.org/s/2+x+4096+MB+1600MHz+Undefined
No information with :  https://openbenchmarking.org/s/2+x+4096+MB+1600MHz+CMY8GX3M2B2133C9
No information with :  https://openbenchmarking.org/s/2+x+4096+MB+PSD34G16002
No information with :  https://openbenchmarking.org/s/2+x+4096+MB+1333MHz+PSD34G16002
No information with :  https://openbenchmarking.org/s/2+x+4096+MB+1600MHz+Elpida
No information with :  https://openbenchmarking.org/s/2+x+4096+MB+Elpida+EBJ40UG8EFU0-GN-F
No information with :  https://ope

No information with :  https://openbenchmarking.org/s/11th+Gen+Intel+Core+i7-1185G7
Done with : https://openbenchmarking.org/s/AMD+Ryzen+5+PRO+3500U
Done with : https://openbenchmarking.org/s/Intel+Core+2+Duo+P9xxx
No information with :  https://openbenchmarking.org/s/Sapphire+NVIDIA+GeForce+GTX+750+Ti
Done with : https://openbenchmarking.org/s/Gigabyte+NVIDIA+GeForce+GTX+1060
Done with : https://openbenchmarking.org/s/AMD+POLARIS11
Done with : https://openbenchmarking.org/s/ASUS+Intel+Iris+Pro+6200
No information with :  https://openbenchmarking.org/s/Intel+Core+i5-9400T
Done with : https://openbenchmarking.org/s/Intel+Pentium+Silver+J5040
Done with : https://openbenchmarking.org/s/AMD+A9-9425+RADEON+R5+5+COMPUTE+CORES+2C
Done with : https://openbenchmarking.org/s/ASUS+Intel+UHD+630+CFL+GT2
No information with :  https://openbenchmarking.org/s/Gigabyte+Intel+HD+530+SKL+GT2
No information with :  https://openbenchmarking.org/s/Intel+HD+530+SKL+GT2
Done with : https://openbenchmarking.o

No information with :  https://openbenchmarking.org/s/16+x+4096+MB+DDR3-1600MHz+Samsung
No information with :  https://openbenchmarking.org/s/2+x+8192+MB+DDR4-2133MT
No information with :  https://openbenchmarking.org/s/4+x+16384+MB+2133MHz
No information with :  https://openbenchmarking.org/s/2+x+8192+MB+DDR3-2133MHz
No information with :  https://openbenchmarking.org/s/15.75+x+32768+MB+2133MHz
No information with :  https://openbenchmarking.org/s/8+x+16384+MB+DDR3-1333MHz+Samsung
No information with :  https://openbenchmarking.org/s/7.875+x+65536+MB+DDR4-2666MHz+Samsung
No information with :  https://openbenchmarking.org/s/1+x+7908+MB+RAM
No information with :  https://openbenchmarking.org/s/8+x+16384+MB+DDR3-1600MHz
No information with :  https://openbenchmarking.org/s/12+x+8192+MB+DDR3-1333MHz
No information with :  https://openbenchmarking.org/s/2+x+16384+MB+DDR4-2133MT
No information with :  https://openbenchmarking.org/s/8+x+16384+MB+2133MHz
No information with :  https://openbe

No information with :  https://openbenchmarking.org/s/4+x+32+GB+DDR4-2933MT
No information with :  https://openbenchmarking.org/s/6+x+16384+MB+DDR4-2400MHz+M393A2K40BB2-CTD
No information with :  https://openbenchmarking.org/s/8+x+32+GB+DDR4-2400MHz+HMA84GR7AFR4N-VK
No information with :  https://openbenchmarking.org/s/8+x+8192+MB+DDR4-2134MT
No information with :  https://openbenchmarking.org/s/4+x+32+GB+DDR4-2400MHz+M393A4K40CB2-CTD
No information with :  https://openbenchmarking.org/s/8+x+32+GB+DDR4-2400MHz+M393A4K40CB2-CTD
No information with :  https://openbenchmarking.org/s/4+x+8+GB+DDR4-3600MT
No information with :  https://openbenchmarking.org/s/4+x+32+GB+DDR4-2400MHz+Micron+36ASF4G72PZ-2G6D1
No information with :  https://openbenchmarking.org/s/6+x+16384+MB+DDR4-2400MHz+HMA82GR7AFR8N-VK
No information with :  https://openbenchmarking.org/s/16+x+16384+MB+DDR3-1333MHz+Micron+36KSF2G72PZ-1G6N1
No information with :  https://openbenchmarking.org/s/16+x+16384+MB+DDR3-1600MHz+Samsun

No information with :  https://openbenchmarking.org/s/NVIDIA+GeForce+GTX+1080+with+Max-Q+Design
Done with : https://openbenchmarking.org/s/MSI+NVIDIA+GeForce+GTX+1660+Ti
Done with : https://openbenchmarking.org/s/Sapphire+AMD+Radeon+RX+56
No information with :  https://openbenchmarking.org/s/MSI+AMD+Radeon+RX+56
Done with : https://openbenchmarking.org/s/Gigabyte+NVIDIA+GeForce+GTX+1660+Ti
No information with :  https://openbenchmarking.org/s/Zotac+NVIDIA+GeForce+GTX+1660+Ti
No information with :  https://openbenchmarking.org/s/Sapphire+NVIDIA+GeForce+GTX+1660+Ti
Done with : https://openbenchmarking.org/s/MSI+NVIDIA+GeForce+RTX+2060
No information with :  https://openbenchmarking.org/s/NVIDIA+Quadro+RTX+3000
Done with : https://openbenchmarking.org/s/MSI+AMD+Radeon+RX+5500
No information with :  https://openbenchmarking.org/s/Sapphire+AMD+Radeon+RX+5500+XT
Done with : https://openbenchmarking.org/s/eVGA+NVIDIA+GeForce+GTX+1060
No information with :  https://openbenchmarking.org/s/XFX+A

No information with :  https://openbenchmarking.org/s/750GB+Seagate+ST3750640NS
No information with :  https://openbenchmarking.org/s/1022GB+SMC2208
No information with :  https://openbenchmarking.org/s/128GB+TOSHIBA+THNSNH12
No information with :  https://openbenchmarking.org/s/512GB+INTEL+SSDSC2KW51
No information with :  https://openbenchmarking.org/s/1275GB+SMC2208
No information with :  https://openbenchmarking.org/s/499GB+SMC2108
No information with :  https://openbenchmarking.org/s/512GB+PLEXTOR+PX-AG512
No information with :  https://openbenchmarking.org/s/240GB+9750-4i+DISK
No information with :  https://openbenchmarking.org/s/1000GB+Western+Digital+WD10JPVX-22J
No information with :  https://openbenchmarking.org/s/1024GB+Samsung+SSD+960+PRO+1TB
No information with :  https://openbenchmarking.org/s/Toshiba+KBG40ZNS256G+NVMe
No information with :  https://openbenchmarking.org/s/512GB+SAMSUNG+MZHPV512
No information with :  https://openbenchmarking.org/s/2+x+512GB+SATA+SSD
No in

No information with :  https://openbenchmarking.org/s/128GB+MTFDDAK128MAM-1J
No information with :  https://openbenchmarking.org/s/36+x+6001GB+HGST+HUS726060AL
No information with :  https://openbenchmarking.org/s/19+x+1000GB+Western+Digital+WD1003FBYZ-0
No information with :  https://openbenchmarking.org/s/32GB+SSDSA2SH032G1GN
No information with :  https://openbenchmarking.org/s/4+x+160GB+Seagate+ST3160813AS
No information with :  https://openbenchmarking.org/s/2+x+147GB+Seagate+ST9146803SS
No information with :  https://openbenchmarking.org/s/18+x+1000GB+Western+Digital+WD1003FBYZ-0
No information with :  https://openbenchmarking.org/s/120GB+GeIL+Zenith+A3+1
No information with :  https://openbenchmarking.org/s/320GB+Western+Digital+WDC+WD3200AAKX-0
No information with :  https://openbenchmarking.org/s/146GB+SMC2208
No information with :  https://openbenchmarking.org/s/1497GB+PERC+6
No information with :  https://openbenchmarking.org/s/240GB+Toshiba
No information with :  https://op

No information with :  https://openbenchmarking.org/s/Tesla+M6
Done with : https://openbenchmarking.org/s/NVIDIA+GeForce+MX230
Done with : https://openbenchmarking.org/s/Intel+Core+i5-4570S
Done with : https://openbenchmarking.org/s/2+x+Intel+Xeon+E5-2637+v3
Done with : https://openbenchmarking.org/s/Intel+Xeon+E5-1620+0
Done with : https://openbenchmarking.org/s/2+x+Intel+Xeon+E5-2695+v3
Done with : https://openbenchmarking.org/s/Intel+Xeon+E5-1603+v3
Done with : https://openbenchmarking.org/s/2+x+Intel+Xeon+E5-2683+v3
Done with : https://openbenchmarking.org/s/Intel+Xeon+X5677
Done with : https://openbenchmarking.org/s/4+x+Intel+Xeon+E5-4620+v2
Done with : https://openbenchmarking.org/s/Intel+Core+i7-2670QM
Done with : https://openbenchmarking.org/s/2+x+Intel+Xeon+E5440
Done with : https://openbenchmarking.org/s/Intel+Core+2+Quad
Done with : https://openbenchmarking.org/s/2+x+Intel+Xeon+5160
Done with : https://openbenchmarking.org/s/2+x+Intel+Xeon+E5504
No information with :  https:

No information with :  https://openbenchmarking.org/s/233GB+Samsung+SSD+970+EVO
Done with : https://openbenchmarking.org/s/Intel+Core+i7-7820HQ
Done with : https://openbenchmarking.org/s/2+x+Intel+Xeon+E5-2643+v3
Done with : https://openbenchmarking.org/s/2+x+Intel+Xeon+Silver+4215
Done with : https://openbenchmarking.org/s/POWER8
Done with : https://openbenchmarking.org/s/2+x+Intel+Xeon+E5-2660+v4
Done with : https://openbenchmarking.org/s/2+x+Intel+Xeon+Silver+4108
Done with : https://openbenchmarking.org/s/Intel+Xeon+Silver+4110
Done with : https://openbenchmarking.org/s/AMD+A6-6400K+APU
Done with : https://openbenchmarking.org/s/Intel+Core+2+Duo+E6750
Done with : https://openbenchmarking.org/s/Intel+Core+2+Duo+P8400
No information with :  https://openbenchmarking.org/s/Intel+Family+6+Model+45+Stepping+7+Intel
Done with : https://openbenchmarking.org/s/Intel+Xeon+E5-2450L+0
Done with : https://openbenchmarking.org/s/Intel+Celeron+N2920
Done with : https://openbenchmarking.org/s/AMD+

No information with :  https://openbenchmarking.org/s/59GB+SanDisk+SDSSDP064G
No information with :  https://openbenchmarking.org/s/149GB+FUJITSU+MHZ2160BH+G2
Done with : https://openbenchmarking.org/s/Intel+Pentium+G3240
No information with :  https://openbenchmarking.org/s/4+x+Intel+Core+i7-4770HQ
Done with : https://openbenchmarking.org/s/Intel+Xeon+E3-1230+v3
Done with : https://openbenchmarking.org/s/Intel+Pentium+G630
Done with : https://openbenchmarking.org/s/Intel+Core+i5-3330
Done with : https://openbenchmarking.org/s/Intel+Xeon+X3450
Done with : https://openbenchmarking.org/s/Intel+Core+i7-3612QM
Done with : https://openbenchmarking.org/s/Intel+Core+2+Duo+T9600
Done with : https://openbenchmarking.org/s/Intel+Pentium+E5400
Done with : https://openbenchmarking.org/s/AMD+A6-3670+APU
Done with : https://openbenchmarking.org/s/Intel+Core+2+Duo+T6600
No information with :  https://openbenchmarking.org/s/Intel+Core+i5+M+450
Done with : https://openbenchmarking.org/s/Intel+Core+i3+3

No information with :  https://openbenchmarking.org/s/NVIDIA+GeForce4+MX+4000
No information with :  https://openbenchmarking.org/s/nv17
No information with :  https://openbenchmarking.org/s/AMD+Radeon+9550
No information with :  https://openbenchmarking.org/s/AMD+FirePro+2260
No information with :  https://openbenchmarking.org/s/S3+ProSavage+KM133
No information with :  https://openbenchmarking.org/s/Intel+Mobile+945GME+IGP
No information with :  https://openbenchmarking.org/s/VIA+VX900
No information with :  https://openbenchmarking.org/s/EMGD+on+PowerVR+SGX535
No information with :  https://openbenchmarking.org/s/S3+Chrome+500
Done with : https://openbenchmarking.org/s/Matrox+MGA+G200eW+WPCM450
No information with :  https://openbenchmarking.org/s/NVIDIA+GeForce2+Go
No information with :  https://openbenchmarking.org/s/Intel+82810E+DC-133
Done with : https://openbenchmarking.org/s/AMD+GX-424CC+SOC
Done with : https://openbenchmarking.org/s/2+x+Intel+Xeon+E5-2680+v4
Done with : https

In [72]:
len(no_info_urls)

1330

In [73]:
len(procs)

2786

#### export neo4j

In [78]:
sys_dir = "./data/systems/"

df_final = pd.DataFrame()

for sys_name in os.listdir(sys_dir):
    df = pd.read_table(sys_dir+sys_name, sep=',', index_col = 0)
    df["sys"] = sys_name[:-4]
    df_final = pd.concat([df_final,df])
    
df_final

Unnamed: 0,idproc,descproc,percentile,nbproc,perf,sys
0,/s/AMD+Ryzen+5+5600X+6-Core,AMD Ryzen 5 5600X 6-Core,98th,6,39,pts-git
1,/s/Intel+Core+i9-11900K,Intel Core i9-11900K,91st,10,40 +/- 2,pts-git
2,/s/AMD+Ryzen+9+5950X+16-Core,AMD Ryzen 9 5950X 16-Core,91st,34,40 +/- 4,pts-git
3,/s/AMD+Ryzen+7+5800X+8-Core,AMD Ryzen 7 5800X 8-Core,90th,15,40 +/- 4,pts-git
4,/s/AMD+Ryzen+9+5900X+12-Core,AMD Ryzen 9 5900X 12-Core,90th,27,41 +/- 5,pts-git
...,...,...,...,...,...,...
22,/s/Intel+Core+i7-5775C,Intel Core i7-5775C,27th,3,31.2 +/- 1.2,pts-smhasher
24,/s/Intel+Core+i7-4790K,Intel Core i7-4790K,17th,4,33.4 +/- 0.1,pts-smhasher
25,/s/Intel+Core+i5-3470,Intel Core i5-3470,14th,3,33.6,pts-smhasher
26,/s/Intel+Core+i3-8100,Intel Core i3-8100,14th,3,33.9,pts-smhasher


In [79]:
df_final.to_csv("final_neo4j.csv")

In [87]:
pd.DataFrame(pd.Series(df_final["descproc"]).unique()).to_csv("procs.csv")

In [88]:
pd.DataFrame(pd.Series(df_final["sys"]).unique()).to_csv("system.csv")

In [5]:
import numpy as np
import os
import itertools

def isfloat(value):
    try:
        float(value)
        return True
    except ValueError:
        return False

check_units= ['',
 u'GHz',
 u'KB',
 u'KB',
 u'MB',
 u'GB/sec',
 u'GB/sec',
 u'MB/sec',
 u'GB/sec',
 u'MB/sec',
 u'GB/sec',
 u'MB/sec',
 u'MB/sec',
 u'MB/sec',
 u'MB/sec',
 u'MB/sec',
 u'MB/sec',
 u'Mpixels/sec',
 u'Mpixels/sec',
 u'Mpixels/sec',
 u'Mpixels/sec',
 u'Mpixels/sec',
 u'Mpixels/sec',
 u'Mpixels/sec',
 u'Mpixels/sec',
 u'Mpixels/sec',
 u'Mpixels/sec',
 u'MB/sec',
 u'MB/sec',
 u'Mpairs/sec',
 u'Mpairs/sec',
 u'Mnodes/sec',
 u'Mnodes/sec',
 u'Gflops',
 u'Gflops',
 u'Gflops',
 u'Gflops',
 u'Gflops',
 u'Gflops',
 u'Gflops',
 u'Gflops',
 u'Gflops',
 u'Gflops',
 u'Gflops',
 u'Gflops',
 u'Gflops',
 u'Gflops',
 u'Mpairs/sec',
 u'Mpairs/sec',
 u'Mpixels/sec',
 u'Mpixels/sec',
 u'GB/sec',
 u'GB/sec',
 u'GB/sec',
 u'GB/sec',
 u'GB/sec',
 u'GB/sec',
 u'GB/sec',
 u'GB/sec']

geek_data = [] # model #, freq, L2, L3, memory, bench, single/multicore, score, perf

path = '../cpu_selection-master/'
files = os.listdir(path)
for file in files:
    units = []
    for i in range(5):
        units.append(0)
    
    data = []
    if not isfloat(file.split('.')[0]):
        continue
    soup = BeautifulSoup(open(os.path.join(path, file)))
    tables = soup.find_all('table', attrs={'class':'table table-striped geekbench2-show system-information'})
    table_body = tables[1].find('tbody')
    rows = table_body.find_all('tr')
    data_t = []
    for row in rows:
        cols = row.find_all('td')
        if len(cols) == 0:
            cols = row.find_all('th')
        cols = [ele.text.strip() for ele in cols]
        data_t.append([ele.replace('\n',' ').replace('  ',' ') for ele in cols if ele]) 
    #print arch
    data.append(data_t)
    
    tables = soup.find_all('table', attrs={'class':'table table-striped geekbench2-show section-performance'})
    for table in tables:
        table_body = table.find('tbody')
        rows = table_body.find_all('tr')
        data_t = []
        for row in rows:
            cols = row.find_all('td')
            if len(cols) == 0:
                cols = row.find_all('th')
            cols = [ele.text.strip() for ele in cols]
            data_t.append([ele.replace('\n',' ').replace('  ',' ') for ele in cols if ele]) 
        #print arch
        data.append(data_t)
    head = []
    head.append(0)
    head.append(0)
    head.append(0)
    head.append(0)
    head.append(0)

    for d in data[0]:
        if d[0] == 'Processor':
            head[0] = d[1].split('@')[0].strip(' ')
            head[1] = float(d[1].split('@')[1].strip(' ').split(' ')[0])
            
            units[0] = ''
            units[1] = d[1].split('@')[1].strip(' ').split(' ')[1]
            if 'M' in units[1]:
                head[1] = 0.001* float(d[1].split('@')[1].strip(' ').split(' ')[0])
                units[1] = d[1].split('@')[1].strip(' ').split(' ')[1].replace('M','G')

            
        elif d[0] == 'L2 Cache':
            head[2] = d[1]
            units[2] = d[1].split(' ')[1]
        elif d[0] == 'L3 Cache':
            if 'KB' in d[1]:
                head[3] = float(d[1].split(' ')[0])
                units[3] = d[1].split(' ')[1]

        elif d[0] == 'Memory':
            if 'MB' in d[1]:
                head[4] = float(d[1].split(' ')[0])
                units[4] = d[1].split(' ')[1]

  
    for i in range(1, len(data)):
        data_current = data[i]
        for d in data_current:
            data_t = []
            for t in head:
                data_t.append(t)

            if len(d[0].split(' ')) == 3:
                data_t.append(d[0].split(' ')[0] + ' ' + d[0].split(' ')[1])
            elif len(d[0].split(' ')) == 2:
                data_t.append(d[0].split(' ')[0])
            else:
                print(d)
            
            if 'Single' in d[0].split(' ')[-1]:
                data_t.append(0)
            elif 'Mult' in d[0].split(' ')[-1]:
                data_t.append(1)
                
            data_t.append(d[1].split(' ')[0])
            
            units.append(d[1].split(' ')[-1])
            if 'G' in check_units[len(units)-1] and 'M' in units[-1]:
                data_t.append(0.001*float(d[1].split(' ')[1]))
                units[-1] = d[1].split(' ')[-1].replace('M','G')
            elif 'M' in check_units[len(units)-1] and 'G' in units[-1]:
                data_t.append(1000*float(d[1].split(' ')[1]))
                units[-1] = d[1].split(' ')[-1].replace('G','M')
            elif 'M' in check_units[len(units)-1] and 'K' in units[-1]:
                data_t.append(0.001*float(d[1].split(' ')[1]))
                units[-1] = d[1].split(' ')[-1].replace('K','M')
            elif 'G' in check_units[len(units)-1] and 'K' in units[-1]:
                data_t.append(0.000001*float(d[1].split(' ')[1]))
                units[-1] = d[1].split(' ')[-1].replace('K','G')
            elif 'G' in check_units[len(units)-1] and 'T' in units[-1]:
                data_t.append(1000*float(d[1].split(' ')[1]))
                units[-1] = d[1].split(' ')[-1].replace('T','G')
            else:
                data_t.append(d[1].split(' ')[1])
                units[-1] = d[1].split(' ')[-1]
            geek_data.append(data_t)
            
    # check units
    for i in range(len(check_units)):
        if check_units[i] != units[i]:
            print(file, i, check_units[i], units[i])

import json
with open('geek_data.json','w') as outfile:
    json.dump(geek_data, outfile)