<a href="https://colab.research.google.com/github/patrickpato/MechConnect-ML/blob/main/Prompt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Load drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Imports
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Embedding, Dense
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
import nltk
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

In [3]:
# Load the CSV data
df = pd.read_csv('/content/drive/MyDrive/MechConnectData/prompts.csv')
df = df.dropna()

In [4]:
# Get the train features
X = df.drop('Issue', axis=1)

In [5]:
# Get test features
y = df['Issue']

In [6]:
# Vocabulary size
voc_size = 5000

In [7]:
# One-hot representation for prompts
prompt = X.copy()
prompt.reset_index(inplace=True)

In [8]:
# Data preprocessing
nltk.download('stopwords')
ps = PorterStemmer()
corpus = []

for i in range(len(prompt)):
    prompt_review = re.sub('[^a-zA-Z]', ' ', prompt['prompt'][i])
    prompt_review = prompt_review.lower()
    prompt_review = prompt_review.split()

    prompt_review = [ps.stem(word) for word in prompt_review if not word in stopwords.words('english')]
    prompt_review = ' '.join(prompt_review)
    corpus.append(prompt_review)

onehot_repr = [one_hot(words, voc_size) for words in corpus]


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [9]:
# Padding sequences
sent_length = 20
embedded_docs = pad_sequences(onehot_repr, padding='pre', maxlen=sent_length)


In [10]:
# Creating model
embedding_vector_features = 150
model = Sequential()
model.add(Embedding(voc_size, embedding_vector_features, input_length=sent_length))
# model.add(Dropout(0.3))
model.add(LSTM(300, return_sequences=False))
# model.add(Dropout(0.3))
model.add(Dense(len(np.unique(y)), activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [11]:
# Prepare data
X_final = np.array(embedded_docs)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_encoded = to_categorical(y_encoded)

X_train, X_test, y_train, y_test = train_test_split(X_final, y_encoded, test_size=0.3, random_state=45)


In [None]:
# Model Training
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=50)
# change epoch size

Epoch 1/50

In [None]:
 # function to test the model on custom sentences
def predict_label(sentence):
    # Preprocess the input sentence
    sentence = re.sub('[^a-zA-Z]', ' ', sentence)
    sentence = sentence.lower()
    sentence = sentence.split()
    sentence = [ps.stem(word) for word in sentence if not word in stopwords.words('english')]
    sentence = ' '.join(sentence)

    # One-hot encode and pad the input sentence
    sentence_onehot = [one_hot(sentence, voc_size)]
    sentence_padded = pad_sequences(sentence_onehot, padding='pre', maxlen=sent_length)

    # Make a prediction
    prediction = model.predict(sentence_padded)

    # Convert the prediction to a class label
    predicted_label = label_encoder.classes_[np.argmax(prediction)]

    return predicted_label


In [None]:
# Example usage of the predictor function:
input_sentence = "My Ac does not go to the desired temprature."
predicted_label = predict_label(input_sentence)
print(f"Predicted Issue: {predicted_label}")

In [None]:
input_sentence = 'Sound in my gear box.'
predicted_label = predict_label(input_sentence)
print(f'Predicted Issue: {predicted_label}')

In [None]:
model.save('prompt_model.h5')

In [None]:
import pickle

# save the iris classification model as a pickle file
model_pkl_file = "prompt_model.h5.pkl"

with open(model_pkl_file, 'wb') as file:
    pickle.dump(model, file)


In [None]:
from google.colab import files

files.download('prompt_model.h5')


In [None]:
# Save the model for future use
model.save('/content/drive/MyDrive/ML/prompt_model.h5')

In [None]:
# load the trained model
model.load_weights('/content/drive/MyDrive/ML/prompt_model.h5')