# Modelagem Empírica do Comportamento de Caches de Diferentes Arquiteturas de Processadores

## Resumo:
Este trabalho tem como proposta extrair modelos matemáticos de modo a rastrear os parâmetros de mapeamento de cache em diferentes níveis com base em dados empíricos de razão de acerto. Esta pesquisa foi inspirada pelo Apêndice 4A do livro do Stallings, que menciona artigos contendo dados reais de razão de acerto para diferentes tamanhos de cache.

In [None]:
# Importing convenient libraries for analysis
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [None]:
!pip install cache-simulator

In [None]:
# Loading the data
data = pd.read_csv('cache.csv')

# Example CSV file structure: cache_size, associativity, replacement_policy, hit_ratio

In [3]:
# Class Cache represents a cache memory for sample tests
class Cache:
    def __init__(self, size, associativity, block_size):
        # Initializes the cache with the specified size, associativity, and block size
        self.size = size
        self.associativity = associativity
        self.block_size = block_size
        self.blocks = size // block_size
        self.data = [None] * self.blocks  # Each block is initialized as empty

    def read(self, address):
        # Simulates cache read operation (we do not handle specific data in this example)
        # Returns True if the address is in the cache (hit), False otherwise (miss)
        pass

    def write(self, address, data):
        # Simulates cache write operation (we use fictitious data)
        # The block corresponding to the address is updated with the provided data
        block_index = address // self.block_size
        self.data[block_index] = data

# Class CacheSimulator is responsible for simulating cache behavior
class CacheSimulator:
    def __init__(self, cache):
        self.cache = cache
        self.hit_rates = []  # List to store hit rates
        self.memory_sizes = []  # List to store simulated memory sizes

    def simulate(self, program, memory_size):
        hits = 0
        misses = 0

        for address in program:
            if self.cache.read(address):
                hits += 1
            else:
                data = random.randint(1, 1000)  # Fictitious data for writing
                self.cache.write(address, data)
                misses += 1

        hit_rate = hits / (hits + misses)

        self.hit_rates.append(hit_rate)
        self.memory_sizes.append(memory_size)

In [None]:
# Splitting the data into training and testing sets
X = data[['cache_size', 'associativity', 'replacement_policy']]
y = data['hit_ratio']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Prediction
y_pred = model.predict(X_test)

# Evaluating the model
mean_squared_error_value = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mean_squared_error_value}')

In [2]:
# Function to plot comparative graphs
def plot_graphs(memory_sizes, avg_memory_costs, efficiency_access, hit_rates_vs_memory_sizes):
    # 1. Relationship of average memory cost with relative memory size
    plt.figure(1)
    plt.plot(memory_sizes, avg_memory_costs, marker='o', linestyle='-')
    plt.xlabel('Relative Memory Size')
    plt.ylabel('Average Memory Cost')
    plt.title('Average Memory Cost x Relative Memory Size')

    # 2. Access efficiency as a function of hit rate (r = T2/T1)
    plt.figure(2)
    plt.plot(efficiency_access, hit_rates_vs_memory_sizes, marker='o', linestyle='-')
    plt.xlabel('Access Efficiency (r = T2/T1)')
    plt.ylabel('Hit Rate (T1)')
    plt.title('Access Efficiency x Hit Rate')

    # 3. Hit rate as a function of relative memory size
    plt.figure(3)
    plt.plot(memory_sizes, hit_rates_vs_memory_sizes, marker='o', linestyle='-')
    plt.xlabel('Relative Memory Size')
    plt.ylabel('Hit Rate (T1)')
    plt.title('Hit Rate x Relative Memory Size')

    # 4. Hit rate as a function of relative memory size (same plot)
    plt.figure(4)
    plt.plot(memory_sizes, hit_rates_vs_memory_sizes, marker='o', linestyle='-')
    plt.xlabel('Relative Memory Size')
    plt.ylabel('Hit Rate (T1)')
    plt.title('Hit Rate x Relative Memory Size')

    plt.show()

if __name__ == "__main__":
    cache_size = 8192  # Cache size
    associativity = 4  # Cache associativity
    block_size = 64  # Block size
    program = [0x100, 0x200, 0x300, 0x400, 0x100, 0x500, 0x200, 0x600]  # Memory access program
    memory_sizes = [1024, 2048, 4096, 8192, 16384]  # Memory sizes for simulation

    avg_memory_costs = []  # List to store average memory costs
    efficiency_access = []  # List to store access efficiency
    hit_rates_vs_memory_sizes = []  # List to store hit rates

    for memory_size in memory_sizes:
        cache = Cache(cache_size, associativity, block_size)
        simulator = CacheSimulator(cache)

        # In this example, we are generating fictitious data for cache writes
        # This should be replaced with real data
        simulator.simulate(program, memory_size)

        avg_memory_cost = cache_size / memory_size
        avg_memory_costs.append(avg_memory_cost)

        r = len(simulator.hit_rates) - 1
        if r == 0:
            efficiency_access.append(1)
        else:
            efficiency_access.append(simulator.hit_rates[r] / simulator.hit_rates[r - 1])
        hit_rates_vs_memory_sizes.append(simulator.hit_rates[-1])

    # Call the function to plot the graphs
    plot_graphs(memory_sizes, avg_memory_costs, efficiency_access, hit_rates_vs_memory_sizes)

NameError: ignored

## Perspectivas:
Este trabalho tem o potencial de apresentar uma abordagem para modelar matematicamente os parâmetros de mapeamento de cache com base em dados empíricos de razão de acerto. Utilizaremos técnicas de regressão linear para estabelecer relações entre o tamanho da cache, associatividade e política de substituição com a razão de acerto, podendo talvez implementar um simulador de cache para diferentes arquiteturas de conjunto de instruções.