# Importing Libraries

In [1]:
import os
import re
import chardet
import pandas as pd
import multiprocessing

# Getting information about your machine

For measuring the cpu efficiency, we assume that all the processors are being used in the code. If that's not the case, modify the value of the variable below

In [2]:
num_processors = multiprocessing.cpu_count()

num_processors

16

# Getting the data and calculating metrics

In [19]:
# Path to the results folder
results_path = '../results'

default_parameters = {
    "difficulty": 4,
    "size": 0,
    "capacity": 10,
    "numBlocks": 5
}

In [24]:
def initialize_dataset(results_folder, default_parameters):
    # List to store the data
    data = []

    # Iterate through the files in the results folder
    for filename in os.listdir(results_folder):
        if filename.endswith('.txt'):
            file_path = os.path.join(results_folder, filename)
            with open(file_path, 'rb') as file:
                rawdata = file.read()
                result = chardet.detect(rawdata)
                file_encoding = result['encoding']
            with open(file_path, 'r', encoding=file_encoding, errors='ignore') as file:
                content = file.read()
                lines = content.split('\n')  # Split content into lines

                # Find the lines that contain "TotalSeconds" and extract the corresponding values
                total_seconds_values = [float(line.split(':')[1].strip().replace(',', '.')) for line in lines if 'TotalSeconds' in line]

                if len(total_seconds_values) == 3:
                    sequential_time, cpu_time, gpu_time = total_seconds_values
                else:
                    print(f"Time values not found in expected format in {filename}")
                    continue

                # Extract parameters from the filename
                parameters = default_parameters.copy()
                for param in re.findall(r'_(\w+)=(\d+)', filename):
                    key, value = param
                    parameters[key] = int(value)

                # Calculate speedup
                cpu_speedup = sequential_time / cpu_time
                gpu_speedup = sequential_time / gpu_time

                # Calculate efficiency
                cpu_efficiency = (cpu_speedup / num_processors) * 100

                # Add row to the data list
                row = {
                    'name': filename[:-4],
                    'difficulty': parameters['difficulty'],
                    'size': parameters['size'],
                    'capacity': parameters['capacity'],
                    'numBlocks': parameters['numBlocks'],
                    'sequential_time': sequential_time,
                    'cpu_time': cpu_time,
                    'gpu_time': gpu_time,
                    'cpu_speedup': cpu_speedup,
                    'gpu_speedup': gpu_speedup,
                    'cpu_efficiency': cpu_efficiency
                }
                data.append(row)

    # Create a DataFrame from the data list
    results_df = pd.DataFrame(data)

    return results_df


In [25]:
# Create a DataFrame from the data list
results_df = initialize_dataset(results_path, default_parameters)

# Display the DataFrame
results_df

Unnamed: 0,name,difficulty,size,capacity,numBlocks,sequential_time,cpu_time,gpu_time,cpu_speedup,gpu_speedup,cpu_efficiency
0,10-08-2023_04-34-58_difficulty=5,5,0,10,5,10.699629,10.95683,16.428109,0.976526,0.6513,6.103287
1,14-08-2023_15-56-29_difficulty=6,6,0,10,5,88.978497,109.663564,16.150781,0.811377,5.509238,5.071106
2,14-08-2023_17-05-05_difficulty=4_numBlocks=10,4,0,10,10,1.063676,1.494003,16.340305,0.711964,0.065095,4.449774
3,14-08-2023_17-06-18_difficulty=4_numBlocks=10_...,4,4,5,10,0.962474,1.347858,16.221627,0.714077,0.059333,4.462979
