In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline

In [None]:
df = pd.read_csv("rocket_chip_ppa.csv")

# Separate features and targets
X = df.drop(columns=["area_mm2", "power_mW", "perf_IPC"])
y = df[["area_mm2", "power_mW", "perf_IPC"]]

In [3]:
# Identify categorical (boolean) and numerical columns
categorical_cols = [col for col in X.columns if df[col].dtype == "bool"]
numeric_cols = [col for col in X.columns if col not in categorical_cols]

In [4]:
df['mulDiv/mul'] = df['mulDiv/mul'].astype('int')
df['divSqrt/div'] = df['divSqrt/div'].astype('int')
df['nSets/ICache'] = df['nSets/ICache'].astype('int')
df['updateD0BOnOrder/BTB'] = df['updateD0BOnOrder/BTB'].astype('int')
df['rocc/Core'] = df['rocc/Core'].astype('int')
df['useAtomics/Core'] = df['useAtomics/Core'].astype('int')
df['divSqrt/sqrt'] = df['divSqrt/sqrt'].astype('int')

In [5]:
df.head()

Unnamed: 0,mulDiv/mul,divSqrt/div,divSqrt/sqrt,updateD0BOnOrder/BTB,rocc/Core,useAtomics/Core,nSets/ICache,nSets/DCache,nTLBEntries/ITLB,nTLBEntries/DCache,...,commitWidth/Core,iqEntries/Core,fpIqEntries/Core,intRfEntries/Core,fpRfEntries/Core,loadQueueEntries/Core,storeQueueEntries/Core,area_mm2,power_mW,perf_IPC
0,1,0,1,0,0,1,256,128,8,4,...,2,32,16,32,128,16,16,4.27,78.79,0.242
1,0,1,0,0,1,1,64,64,8,8,...,3,16,32,128,128,16,8,1.81,28.93,0.248
2,1,1,0,0,1,1,64,64,4,8,...,1,32,32,32,32,8,16,1.86,29.42,0.281
3,1,1,0,0,1,1,256,64,8,4,...,2,8,16,32,128,32,8,3.83,64.59,0.252
4,1,1,0,1,0,0,128,128,4,8,...,3,8,16,128,64,8,16,2.93,54.41,0.351


In [None]:
targets = ['area_mm2', 'power_mW', 'perf_IPC']

# Dictionary to store results
results = {}

for target in targets:
    print(f"\n=== Predicting {target} ===")
    
    # Features and label
    X = df.drop(targets, axis=1)  # drop all targets from features
    y = df[target]

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Initialize and train model
    model = RandomForestRegressor(random_state=42)
    model.fit(X_train, y_train)

    # Predict
    y_pred = model.predict(X_test)

    # Evaluation
    r2 = r2_score(y_test, y_pred)

    print(f"R² Score: {r2:.4f}")

    results[target] = {
        'model': model,
        'r2_score': r2
    }



=== Predicting area_mm2 ===
R² Score: 0.7954

=== Predicting power_mW ===
R² Score: 0.7866

=== Predicting perf_IPC ===
R² Score: 0.6939
