In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional

# Load the dataset
df = pd.read_csv('cpu_gpu_code.csv')

# Create target labels
df['target'] = np.where(df['CPU runing time'] < df['GPU runing time'], 0, 1)

# Combine CPU assembly and GPU PTX code
df['code'] = df['CPU assembly code '] + ' ' + df['GPU ptx code']

# Tokenize the code
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['code'])
sequences = tokenizer.texts_to_sequences(df['code'])

# Set the parameters
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 128
max_length = max([len(seq) for seq in sequences])

# Pad the sequences
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

# Split the dataset
x_train, x_val, y_train, y_val = train_test_split(padded_sequences, df['target'].values, test_size=0.2, random_state=42)

# Build the model
model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_length),
    Bidirectional(LSTM(64, return_sequences=True)),
    Bidirectional(LSTM(32)),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=10, batch_size=32)

# Save the tokenizer and model
import pickle
with open('tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

model.save('cpu_gpu_predictor.h5')


Epoch 1/10


In [4]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle

# Load the saved model and tokenizer
with open('tokenizer.pkl', 'rb') as f:
    tokenizer = pickle.load(f)

model = load_model('cpu_gpu_predictor.h5')

# Input: new CPU assembly code and GPU ptx code
cpu_code = '''
; Increment each element of an array by one in x86 assembly
; void increment_array(int* array, int size);

section .text
global increment_array
increment_array:
    ; Function prologue
    push ebp
    mov ebp, esp

    ; Load parameters
    mov eax, [ebp+8] ; array
    mov ecx, [ebp+12] ; size

    ; Loop
    xor edx, edx ; i = 0
.loop:
    cmp edx, ecx
    jge .end_loop

    ; Increment array element by one
    add dword [eax + edx*4], 1

    ; Increment loop counter
    inc edx
    jmp .loop

.end_loop:
    ; Function epilogue
    pop ebp
    ret
'''

gpu_code = '''
.version 6.4
.target sm_30
.address_size 64

.entry increment_array_gpu (
    .param .u64 increment_array_gpu_param_0, ; int* array
    .param .u32 increment_array_gpu_param_1  ; int size
)
{
    .reg .b32 %r<3>;
    .reg .b64 %rd<3>;

    ; Load parameters
    ld.param.u64 %rd1, increment_array_gpu_param_0;
    ld.param.u32 %r1, increment_array_gpu_param_1;

    ; Get the thread index
    mov.u32 %r2, %tid.x;

    ; Check if the thread index is within the array size
    setp.ge.u32 %p1, %r2, %r1;
    @%p1 exit;

    ; Increment array element by one
    atomic.add.s32 [%rd1 + %r2 * 4], 1;

    exit:;
    ret;
}
'''

# Preprocess the input data
input_data = cpu_code + ' ' + gpu_code
input_sequence = tokenizer.texts_to_sequences([input_data])
padded_input_sequence = pad_sequences(input_sequence, maxlen=max_length, padding='post')

# Make predictions using the loaded model
prediction = model.predict(padded_input_sequence)

# Choose the best option (CPU or GPU) based on the prediction
if prediction[0][0] < 0.5:
    print("Choose CPU code")
else:
    print("Choose GPU code")


Choose CPU code
