In [19]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error


for task in ["mnist_classification", "cifar10_classification", "cifar10_deep_classification"]:
    print("=" * 40)
    print(task)

    # Load the data into a pandas dataframe
    df = pd.read_csv(f"training_data/{task}.csv")

    # Split the data into features (X) and target (y)
    X = df[['batch_size', 'epoch', 'average_memory_utilization', 'average_gpu_utilization']]
    y = df['training_time']

    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # Train the linear regression model
    reg = LinearRegression().fit(X_train, y_train)

    # Make predictions on the test data
    y_pred = reg.predict(X_test)

    min_mse = 10000
    min_rmse = 10000
    for _ in range(1000):
        # Evaluate the model using mean squared error
        mse = mean_squared_error(y_test, y_pred)
        if mse < min_mse:
            min_mse = mse
        min_rmse = np.sqrt(mse)
    print("\nMSE:", min_mse)
    print("RMSE:", min_rmse)

mnist_classification

MSE: 15.068411514542921
RMSE: 3.8818051876083275
cifar10_classification

MSE: 20.318021346529406
RMSE: 4.507551591111233
cifar10_deep_classification

MSE: 20.589759251161194
RMSE: 4.537593993644781
