In [62]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

for task in ["mnist_classification", "cifar10_classification", "cifar10_deep_classification"]:
    print("=" * 40)
    print(task)

    # Load the data into a pandas dataframe
    df = pd.read_csv(f"training_data/{task}.csv")

    # Normalize the data
    scaler = MinMaxScaler()
    df[['batch_size', 'epoch', 'average_memory_utilization', 'average_gpu_utilization']] = scaler.fit_transform(df[['batch_size', 'epoch', 'average_memory_utilization', 'average_gpu_utilization']])

    # Create the weights for the independent variables
    weights = np.array([0.75, 1, 0.25, 0.5])

    # Multiply the independent variables by their corresponding weights
    df['batch_size'] *= weights[0]
    df['epoch'] *= weights[1]
    df['average_memory_utilization'] *= weights[2]
    df['average_gpu_utilization'] *= weights[3]

    # Split the data into independent and dependent variables
    X = df.drop(columns=['training_time'])
    y = df['training_time']
    
    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # Create a linear regression model
    model = LinearRegression()

    # Fit the model to the data
    model.fit(X_train, y_train)

    # Predict the values on the test data
    y_pred = model.predict(X_test)

    # Calculate the mean squared error
    mse = mean_squared_error(y_test, y_pred)
    print(f"RMSE: {np.sqrt(mse)}")

mnist_classification
RMSE: 6.299366661325611
cifar10_classification
RMSE: 3.9622236874940806
cifar10_deep_classification
RMSE: 3.7328166465166017
