In [1]:
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import re
import os
from pathlib import Path
import shutil
import sys
import argparse
import csv

import seaborn as sns 
import matplotlib.pyplot as plt 
from sklearn import preprocessing, svm 
from sklearn.model_selection import train_test_split as tts
# from sklearn import linear_model
import random
from sklearn.model_selection import cross_val_score
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.tree import DecisionTreeRegressor
import cudf
from sklearn.metrics import r2_score

--------------------------------------------------------------------------------

  CuPy may not function correctly because multiple CuPy packages are installed
  in your environment:

    cupy-cuda11x, cupy-cuda12x

  Follow these steps to resolve this issue:

    1. For all packages listed above, run the following command to remove all
       existing CuPy installations:

         $ pip uninstall <package_name>

      If you previously installed CuPy via conda, also run the following:

         $ conda uninstall cupy

    2. Install the appropriate CuPy package.
       Refer to the Installation Guide for detailed instructions.

         https://docs.cupy.dev/en/stable/install.html

--------------------------------------------------------------------------------



In [2]:
kelvin_offset = 273.15

def getDimensions(file, layer):
    columns = pd.read_csv(file, nrows=1)
    nrows = 0
    ncols = 0
    while f'V(NODE{layer}_{nrows}_0)' in columns:
        nrows += 1
    while f'V(NODE{layer}_0_{ncols})' in columns:
        ncols += 1
    return nrows, ncols


def readFormatInput(file, n_rows, n_cols, layer):
    column = [f'V(NODE{layer}_{row}_{col})' for row in range(n_rows) for col in range(n_cols)]
    df_l = pd.read_csv(file, usecols=column)
    return df_l

def getTemp(file):
    grid_rows, grid_cols = getDimensions(file, 1)
    df_l = readFormatInput(file, grid_rows, grid_cols, 1)
    df_l -= kelvin_offset
    return df_l

In [3]:
os.chdir('outputs')
outputs = os.listdir()
temp_files_paths = []

for file in outputs:
    path = os.getcwd()
    temp_files_paths.append(f"{path}/{file}")
os.chdir('../')

In [4]:
ptrace_file_path =  "/usr4/spclpgm/zhoua25/PACT/ML_Dataset/ptrace_files/tier1_ptrace.csv"
ptrace_file =  pd.read_csv(ptrace_file_path, header = 0)
power = ptrace_file['Power'].sum()

In [5]:
df = pd.DataFrame()
df['Powerusage'] = [power]* len(temp_files_paths)
df['Temperature_files'] = temp_files_paths

"""
The order of the power files will likely not be the same as in the other regression, this can be fixed later if the two regressions are combined



"""

In [6]:
def powerPerNode(powerusage, nodes):
    nodes *= nodes
    pPN = [powerusage/nodes]*nodes
    return pd.DataFrame(pPN, columns = ['PowerPerNode'])

In [7]:
df1 = df.to_numpy()
df1[0]

array([19,
       '/usr4/spclpgm/zhoua25/PACT/ML_Dataset/outputs/output121.cir.csv'],
      dtype=object)

In [7]:
def getData(num, df):
    df1 = df.to_numpy()
    test = powerPerNode(df1[num][0], 100)
    test1= getTemp(df1[num][1])
    cols = test1.columns
    # test1.columns = [None] * len(test1.columns)
    test1 = test1.transpose().reset_index()
    test['NodeTemp'] = test1[0]
    test['NodeName'] = cols
    test['index1'] = test.index
    return test

In [8]:
# getData(1, df)

In [8]:
def get_random(num):
    list = []
    for i in range(num):
        list.append(random.randint(0,10000))
    return list

In [10]:
def generate_data(randNodes, df): 
    x = pd.DataFrame()
    for i in range(len(df)):
        rand = getData(i, df)
        rand['TempReadings']= np.nan
        for num in randNodes:
            rand['TempReadings'][num] = rand.iloc[num,1]
        min = rand['TempReadings'].min()
        rand['TempReadings'] = rand['TempReadings'].fillna(min)
        data = [x,rand]
        x = pd.concat(data)
    return x

In [41]:
# def impute_data(x):
#     # if x.shape[1] != 3:
#     #     return x
#     temp = x
#     imputer = IterativeImputer(random_state = 0)
#     imputed = imputer.fit_transform(temp[['TempReadings','index1','PowerPerNode']])
#     df_imputed = pd.DataFrame(imputed, columns = ['TempReadings','index1','PowerPerNode'])
#     return df_imputed

In [10]:
def get_regression(data):
    # df_imputed = impute_data(data)
    # if df_imputed.shape[1] != 3:
    #     X = df_imputed[['PowerPerNode', 'index1']]
    # else:
    X = df_imputed[['PowerPerNode', 'index1', 'TempReadings']]
    y = data['NodeTemp']
    X_train, X_test, y_train, y_test = tts(X, y, test_size = 0.25)
    regr = DecisionTreeRegressor()
    regr.fit(X_train, y_train)
    return regr, X_train, X_test, y_train, y_test
    

In [11]:
def get_score(regr, X_train, X_test, y_train, y_test):
    scores = cross_val_score(regr, X_train, y_train, scoring='r2', cv=5)
    return regr.score(X_test, y_test), scores
    

In [15]:
data = generate_data([], df)

In [14]:
data = pd.read_csv("data.csv")

In [15]:
regr, X_train, X_test, y_train, y_test = get_regression(data)

In [16]:
print(get_score(regr, X_train, X_test, y_train, y_test))

(0.345040465224977, array([0.34461719, 0.34638406, 0.3440031 , 0.34413182, 0.34457075]))


In [19]:
print(get_random(10))

[6682, 8551, 2222, 901, 1499, 8364, 1863, 1916, 2592, 4357]


In [11]:
randNodes = [6682, 8551, 2222, 901, 1499, 8364, 1863, 1916, 2592, 4357]

In [None]:
data_10 = generate_data(rand_Nodes, df)

0        NaN
1        NaN
2        NaN
3        NaN
4        NaN
          ..
359995   NaN
359996   NaN
359997   NaN
359998   NaN
359999   NaN
Name: TempReadings, Length: 360000, dtype: float64

In [42]:
data_imputed = impute_data(new_data)


In [43]:
data_imputed

Unnamed: 0,TempReadings,index1,PowerPerNode
0,49.475362,0.0,0.00219
1,49.475269,1.0,0.00219
2,49.475177,2.0,0.00219
3,49.475084,3.0,0.00219
4,49.474992,4.0,0.00219
...,...,...,...
359995,48.550657,9995.0,0.00409
359996,48.550565,9996.0,0.00409
359997,48.550472,9997.0,0.00409
359998,48.550380,9998.0,0.00409


In [44]:
regr_10, X_train, X_test, y_train, y_test = get_regression(new_data)

In [45]:
print(get_score(regr_10, X_train, X_test, y_train, y_test))

(0.9738819167957765, array([0.96539709, 0.96127825, 0.96483882, 0.95930909, 0.96432744]))


In [47]:
d = data_10[['PowerPerNode','index1','TempReadings']].iloc[7190000:7200000]
d_new = impute_data(d)

In [50]:
d_new[['PowerPerNode', 'index1', 'TempReadings']]

Unnamed: 0,PowerPerNode,index1,TempReadings
0,0.0019,0.0,48.231463
1,0.0019,1.0,48.231297
2,0.0019,2.0,48.231131
3,0.0019,3.0,48.230966
4,0.0019,4.0,48.230800
...,...,...,...
9995,0.0019,9995.0,46.573615
9996,0.0019,9996.0,46.573449
9997,0.0019,9997.0,46.573284
9998,0.0019,9998.0,46.573118


In [51]:
r2_score(regr_10.predict(d_new[['PowerPerNode', 'index1', 'TempReadings']]), data_10['NodeTemp'].iloc[7190000:7200000])

-0.9729400212677375