In [3]:
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import re
import os
from pathlib import Path
import shutil
import sys
import argparse
import csv
import random

import seaborn as sns 
import matplotlib.pyplot as plt 
from sklearn import preprocessing, svm 
from sklearn.model_selection import train_test_split as tts
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import cross_val_score
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score

import cudf
import cuml
from cuml.model_selection import train_test_split
#from cuml.linear_model.linear_regression import LinearRegression
from cuml.ensemble import RandomForestRegressor

--------------------------------------------------------------------------------

  CuPy may not function correctly because multiple CuPy packages are installed
  in your environment:

    cupy-cuda11x, cupy-cuda12x

  Follow these steps to resolve this issue:

    1. For all packages listed above, run the following command to remove all
       existing CuPy installations:

         $ pip uninstall <package_name>

      If you previously installed CuPy via conda, also run the following:

         $ conda uninstall cupy

    2. Install the appropriate CuPy package.
       Refer to the Installation Guide for detailed instructions.

         https://docs.cupy.dev/en/stable/install.html

--------------------------------------------------------------------------------



In [4]:
kelvin_offset = 273.15

def getDimensions(file, layer):
    columns = cudf.read_csv(file, nrows=1)
    nrows = 0
    ncols = 0
    while f'V(NODE{layer}_{nrows}_0)' in columns:
        nrows += 1
    while f'V(NODE{layer}_0_{ncols})' in columns:
        ncols += 1
    return nrows, ncols


def readFormatInput(file, n_rows, n_cols, layer):
    column = [f'V(NODE{layer}_{row}_{col})' for row in range(n_rows) for col in range(n_cols)]
    df_l = cudf.read_csv(file, usecols=column)
    return df_l

def getTemp(file):
    grid_rows, grid_cols = getDimensions(file, 4)
    df_l = readFormatInput(file, grid_rows, grid_cols, 4)
    df_l -= kelvin_offset
    return df_l

In [5]:
os.chdir('different_ptraces')
folders = os.listdir()


ptrace_files_path = []

for folder in folders:
    os.chdir(f'{folder}')
    files = os.listdir()
    for file in files:
        if "scaled" in file:
            path = os.getcwd()
            ptrace_files_path.append(f"{path}/{file}")
            os.chdir('../')
os.chdir('../')

os.chdir('outputs')
outputs = os.listdir()
temp_files_paths = []

for file in outputs:
    path = os.getcwd()
    temp_files_paths.append(f"{path}/{file}")
os.chdir('../')

In [6]:
powers = []
for file in ptrace_files_path:
    d = cudf.read_csv(file, header = 0)
    powers.append(d['Power'].sum())

In [7]:
#matching powertrace and temperature outputs 
fixed_list = []
for i in range(len(ptrace_files_path)+1):
    for file in temp_files_paths:
        if f"output{i}.cir.csv" in file:
            fixed_list.append(file)

In [8]:
df = cudf.DataFrame()
df['Powerusage'] = powers
df['Temperature_files'] = fixed_list

In [9]:
def powerPerNode(powerusage, nodes):
    nodes *= nodes
    pPN = [powerusage/nodes]*nodes
    return cudf.DataFrame(pPN, columns = ['PowerPerNode'])

In [26]:
def getData(num, df):
    df1 = df.to_numpy()
    test = powerPerNode(df1[0][num], 100)
    test1= getTemp(df1[num][1])
    cols = test1.columns
    # test1.columns = [None] * len(test1.columns)
    test1 = test1.transpose().reset_index()
    test['NodeTemp'] = test1[0]
    test['NodeName'] = cols
    test['index1'] = test.index
    return test

In [28]:
p = getData(0, df)
p

Unnamed: 0,PowerPerNode,NodeTemp,NodeName,index1
0,0.00219,47.69,V(NODE4_0_0),0
1,0.00219,47.69,V(NODE4_0_1),1
2,0.00219,47.69,V(NODE4_0_2),2
3,0.00219,47.69,V(NODE4_0_3),3
4,0.00219,47.70,V(NODE4_0_4),4
...,...,...,...,...
9995,0.00219,46.04,V(NODE4_99_95),9995
9996,0.00219,46.04,V(NODE4_99_96),9996
9997,0.00219,46.04,V(NODE4_99_97),9997
9998,0.00219,46.04,V(NODE4_99_98),9998


In [11]:
def get_random(num):
    list = []
    for i in range(num):
        list.append(random.randint(0,10000))
    return list

In [30]:
#Regression with no sensor readings given to model
x = cudf.DataFrame()
for i in range(len(df)):
    rand = getData(i)
    data = [x,rand]
    x = cudf.concat(data)


In [123]:
X = x[['PowerPerNode', 'index1']].to_pandas()
y = x['NodeTemp'].to_pandas()
X_train, X_test, y_train, y_test = tts(X, y, test_size = 0.2)
regr = DecisionTreeRegressor()
regr.fit(X_train, y_train)
print(regr.score(X_test, y_test)) 

0.947914692044515


In [28]:
scores = cross_val_score(regr, X_train, y_train, scoring='r2', cv=5)
scores

array([0.40344731, 0.40333178, 0.40242806, 0.40439912, 0.40398463])

In [244]:
###Start of regressing where model gets 10 temperature readings

In [12]:
randNodes = [5868, 124, 1367, 5158, 5342, 3671, 8696, 7527, 1687, 6938]

In [29]:
def generate_data(randNodes, df): 
    x = cudf.DataFrame()
    for i in range(len(df)):
        rand = getData(i, df)
        rand['TempReadings']= np.nan
        for num in randNodes:
            rand['TempReadings'][num] = rand.iloc[num,1]
        data = [x,rand]
        x = cudf.concat(data)
    return x

In [14]:
# x2 = pd.DataFrame()
# for i in range(len(df)):
#     rand = getData(i)
#     rand['TempReadings'] = np.nan
#     for num in randNodes:
#         rand['TempReadings'][num] = rand.iloc[num,1]
#     data = [x,rand]
#     x2 = pd.concat(data)

In [15]:
def impute_data(x):
    temp = x.to_pandas()
    imputer = IterativeImputer(random_state = 0)
    imputed = imputer.fit_transform(temp[['TempReadings','index1','PowerPerNode']])
    df_imputed = cudf.DataFrame(imputed, columns = ['TempReadings','index1','PowerPerNode'])
    return df_imputed

In [89]:
X_10 = df_imputed[['PowerPerNode', 'index1', 'TempReadings']].to_pandas()
# temp = cudf.from_pandas(temp)
y_10 = x['NodeTemp']

X10_train, X10_test, y10_train, y10_test = tts(X_10, y_10, test_size = 0.25)

regr_10 = DecisionTreeRegressor()
regr_10.fit(X10_train, y10_train)
# print(cuml.metrics.regression.r2_score(y10_test, regr_10.predict(X10_test)))



In [145]:
poly = PolynomialFeatures(degree = 4)
X10_poly = poly.fit_transform(X_10)
X10_train, X10_test, y10_train, y10_test = tts(X10_poly, y_10, test_size = 0.25)
poly.fit(X10_train, y10_train)

lin = LinearRegression()
lin.fit(X10_train, y10_train)
poly_reg_y_predicted = lin.predict(X10_test)
poly_score = r2_score(y10_test, poly_reg_y_predicted)

print(poly_score)

0.5576834928214964


In [117]:
print(regr_10.score(X10_test, y10_test))

0.9354886131110398


In [118]:
scores = cross_val_score(regr_10, X10_train, y10_train, scoring='r2', cv=5)
scores

array([0.93245627, 0.9328622 , 0.93202003, 0.93444115, 0.93078416])

#

create regressions giving it the same random temperature nodes
Give increasingly more temperature nodes 
create plot showing how giving it more temperature sensors affects accuracy

get the model to print out a file containing the temperature of all the sensors. 


"""

In [107]:
print(get_random(20))

[9035, 2870, 9581, 8999, 4607, 7505, 8702, 7223, 7851, 9410, 67, 1410, 7282, 1899, 2286, 5565, 2377, 9144, 4325, 8171]


In [108]:
randNodes = [9035, 2870, 9581, 8999, 4607, 7505, 8702, 7223, 7851, 9410, 67, 1410, 7282, 1899, 2286, 5565, 2377, 9144, 4325, 8171]

In [109]:
data = generate_data(randNodes, df)


In [124]:
data_20 = data

In [16]:
def get_regression(data):
    df_imputed = impute_data(data)
    X = df_imputed[['PowerPerNode', 'index1', 'TempReadings']].to_pandas()
    y = data['NodeTemp'].to_pandas()
    X_train, X_test, y_train, y_test = tts(X, y, test_size = 0.25)
    regr = DecisionTreeRegressor()
    regr.fit(X_train, y_train)
    return regr, X_train, X_test, y_train, y_test
    

In [23]:
def get_score(regr, X_train, X_test, y_train, y_test):
    scores = cross_val_score(regr, X_train, y_train, scoring='r2', cv=5)
    return regr.score(X_test, y_test), scores
    

In [125]:
print(get_regression_score(data_20))

(0.9353393294677939, array([0.93227856, 0.93343519, 0.93332925, 0.93358784, 0.93266497]))


In [19]:
print(get_random(30))

[2756, 649, 3239, 1978, 6306, 3772, 3116, 5284, 4502, 6744, 3721, 5753, 5530, 8880, 339, 9520, 3090, 7441, 8539, 1111, 4989, 5747, 2887, 8753, 8652, 1962, 5975, 8343, 5044, 9489]


In [20]:
randNodes = [2756, 649, 3239, 1978, 6306, 3772, 3116, 5284, 4502, 6744, 3721, 5753, 5530, 8880, 339, 9520, 3090, 7441, 8539, 1111, 4989, 5747, 2887, 8753, 8652, 1962, 5975, 8343, 5044, 9489]

In [21]:
data_30 = generate_data(randNodes, df)

In [22]:
regr_30, X_train, X_test, y_train, y_test = get_regression(data_30)

In [24]:
print(get_score(regr_30, X_train, X_test, y_train, y_test))

(0.9365862033720366, array([0.92945295, 0.93153788, 0.93303299, 0.93305278, 0.93248637]))
