# Extract Data

This notebook extracts data from simulation results folder and created a database.csv.

## Packages

Open a terminal and run:
```bash
pip install os
pip install pandas
pip install re
```

In [7]:
import os
import pandas as pd
import re

## Initialization

In [8]:
project_folder = '/mnt/c/Users/guitr/Documents/vivado/matrix_multiplication'
base_folder = os.path.join(project_folder, 'matrix_multiplication')
print(base_folder)

algorithms = [0]
optimizations = [0, 1, 2, 3]
matrix_sizes = [8, 16, 32, 64, 128, 256]

/mnt/c/Users/guitr/Documents/vivado/matrix_multiplication/matrix_multiplication


In [9]:
def get_pattern(text: str, pattern: str) -> str | None:
    """
    Return an extract of a text based on a pattern.

    Args:
        text (str): text to search.
        pattern (str): pattern to match.

    Returns:
        str: extracted text or an empty string if no match is found.
    """
    match = re.search(pattern, text)

    if match:
        # Return the extracted number (first capture group)
        return match.group(1)
    else:
        return "No match found"


def get_ap_clk(report: str) -> float | None:
    """
    Return ap_clk from report file.

    Args:
        report (str): string representation of report.
    """
    pattern = r"    \|ap_clk  \|  10\.00\|\s+(\d+\.\d+)+\|"

    return float(get_pattern(report, pattern))


def get_instructions(report: str, instruction: str) -> int | None:
    """
    Return maximum latency value from report file.

    Args:
        report (str): string representation of report.
        instruction (str): instruction to be returned.
    """
    pattern = re.compile(
        r"\|\s*(?P<latency_min>\d+)\s*\|\s*(?P<latency_max>\d+)\s*\|\s*(?P<interval_min>\d+)\s*\|\s*(?P<interval_max>\d+)\s*\|"
    )

    match = pattern.search(report)

    if match:
        return int(match.group(instruction))
    else:
        return None


def get_latency_max(report: str) -> int | None:
    """
    Return maximum latency value from report file.

    Args:
        report (str): string representation of report.
    """
    return get_instructions(report, 'latency_max')


def get_hardware_usage(report: str, hardware: str) -> list[int] | None:
    """
    Return hardware usage from report file.

    Args:
        report (str): string representation of report.
        hardware (str): hardware to be returned.
    """
    pattern = re.compile(
        r"\|Total            \|\s+(?P<BRAM_18K>\d+)\|\s+(?P<CSP48E>\d+)\|\s+(?P<FF>\d+)\|\s+(?P<LUT>\d+)\|"
    )

    match = pattern.search(report)

    if match:
        return int(match.group(hardware))
    else:
        return None


def get_BRAM_18K(report: str) -> int | None:
    """
    Return BRAM_18K from report file.

    Args:
        report (str): string representation of report.
    """

    return int(get_hardware_usage(report, 'BRAM_18K'))


def get_CSP48E(report: str) -> int | None:
    """
    Return CSP48E from report file.

    Args:
        report (str): string representation of report.
    """

    return int(get_hardware_usage(report, 'CSP48E'))


def get_FF(report: str) -> int | None:
    """
    Return FF from report file.

    Args:
        report (str): string representation of report.
    """

    return int(get_hardware_usage(report, 'FF'))


def get_LUT(report: str) -> int | None:
    """
    Return LUT from report file.

    Args:
        report (str): string representation of report.
    """

    return int(get_hardware_usage(report, 'LUT'))


database = []

for algorithm in algorithms:
    for optimization in optimizations:
        for matrix_size in matrix_sizes:
            file_data = []
            file_path = os.path.join(base_folder, f'solution_{algorithm}_{optimization}_{matrix_size}', 'syn', 'report', f'matrix_mult_{algorithm}_{optimization}_csynth.rpt')

            # Check if the file exists
            if os.path.isfile(file_path):
                with open(file_path, 'r') as file:
                    content = file.read()

                    file_data.append(algorithm)
                    file_data.append(optimization)
                    file_data.append(matrix_size)
                    file_data.append('int')
                    file_data.append(get_ap_clk(content))
                    file_data.append(get_latency_max(content))
                    file_data.append(get_BRAM_18K(content))
                    file_data.append(get_CSP48E(content))
                    file_data.append(get_FF(content))
                    file_data.append(get_LUT(content))

                    database.append(file_data)


output_csv = os.path.join(os.getcwd(), 'output', 'simulation_data.csv')
headers = ['algorithm', 'optimization', 'matrix_size', 'matrix_datatype', 'ap_clk_estimated', 'latency_max', 'BRAM_18K', 'CSP48E', 'FF', 'LUT']
pd.DataFrame(database, columns=headers).to_csv(output_csv, index=False)