In [1]:
# Installing Required Library
!pip install pandas
!pip install scikit-learn



In [2]:
#Importing Required Library
import numpy as np
import pandas as pd
import json
import pickle
import requests
import zipfile
import os

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.initializers import Constant

In [3]:
#Loading the Dataset
df = pd.read_csv("/content/Dataset_SB.csv")

In [4]:
df

Unnamed: 0,Participant,Compliments,Questions,Feedback,Summary/Takeaway
0,Alice Williams,Your project's creativity and originality are ...,How did you come up with the concept for your ...,Excellent work on project execution. Consider ...,Impressive project with creative elements. Pot...
1,Bob Davis,"The documentation is clear and concise, making...",What challenges did you face during the implem...,Great attention to detail in the documentation...,Clear documentation and attention to detail. E...
2,Catherine Miller,Your project's simplicity and user-friendlines...,How did you decide on the user interface desig...,Positive feedback on the project's user experi...,User-friendly design with clear visuals. Explo...
3,David Anderson,The choice of algorithms in your project refle...,What inspired you to work on this particular p...,Impressive work on project execution. Consider...,Thorough understanding of algorithms and pract...
4,Emma Taylor,The project's innovative approach to problem-s...,How did you validate the performance of your M...,Constructive feedback on the project's impact ...,Innovative problem-solving approach with engag...
...,...,...,...,...,...
294,Oliver Davis,"The user interface is intuitive, contributing ...",How did you handle imbalanced datasets in your...,Positive feedback on the overall project impac...,Consider providing insights into potential col...
295,Madison Taylor,Your project's simplicity makes it easy for us...,How did you decide on the user interface desig...,Impressive work on project execution. Consider...,Consider providing more insights into potentia...
296,Olivia Davis,The project's innovative solutions to complex ...,How did you come up with the concept for your ...,Great attention to detail in the documentation...,Consider discussing potential directions for f...
297,Mia Taylor,The project's simplicity makes it accessible t...,How did you decide on the user interface desig...,Positive feedback on the project's user experi...,Consider exploring potential ethical considera...


In [5]:
df.columns

Index(['Participant', 'Compliments', 'Questions', 'Feedback',
       'Summary/Takeaway'],
      dtype='object')

In [6]:
# Preprocess your dataset
X = df[['Compliments', 'Questions', 'Feedback']]
y = df["Summary/Takeaway"]


In [7]:
df.columns

Index(['Participant', 'Compliments', 'Questions', 'Feedback',
       'Summary/Takeaway'],
      dtype='object')

In [8]:
X.head()

Unnamed: 0,Compliments,Questions,Feedback
0,Your project's creativity and originality are ...,How did you come up with the concept for your ...,Excellent work on project execution. Consider ...
1,"The documentation is clear and concise, making...",What challenges did you face during the implem...,Great attention to detail in the documentation...
2,Your project's simplicity and user-friendlines...,How did you decide on the user interface desig...,Positive feedback on the project's user experi...
3,The choice of algorithms in your project refle...,What inspired you to work on this particular p...,Impressive work on project execution. Consider...
4,The project's innovative approach to problem-s...,How did you validate the performance of your M...,Constructive feedback on the project's impact ...


In [9]:
y

Unnamed: 0,Summary/Takeaway
0,Impressive project with creative elements. Pot...
1,Clear documentation and attention to detail. E...
2,User-friendly design with clear visuals. Explo...
3,Thorough understanding of algorithms and pract...
4,Innovative problem-solving approach with engag...
...,...
294,Consider providing insights into potential col...
295,Consider providing more insights into potentia...
296,Consider discussing potential directions for f...
297,Consider exploring potential ethical considera...


In [10]:
# Concatenate 'Compliments', 'Questions', and 'Feedback' columns into a single text
x = X['Compliments'] + ' ' + X['Questions'] + ' ' + X['Feedback']


In [11]:
df1 = pd.DataFrame({'Response': x, 'Summary/Takeaway': y})

In [12]:
df1

Unnamed: 0,Response,Summary/Takeaway
0,Your project's creativity and originality are ...,Impressive project with creative elements. Pot...
1,"The documentation is clear and concise, making...",Clear documentation and attention to detail. E...
2,Your project's simplicity and user-friendlines...,User-friendly design with clear visuals. Explo...
3,The choice of algorithms in your project refle...,Thorough understanding of algorithms and pract...
4,The project's innovative approach to problem-s...,Innovative problem-solving approach with engag...
...,...,...
294,"The user interface is intuitive, contributing ...",Consider providing insights into potential col...
295,Your project's simplicity makes it easy for us...,Consider providing more insights into potentia...
296,The project's innovative solutions to complex ...,Consider discussing potential directions for f...
297,The project's simplicity makes it accessible t...,Consider exploring potential ethical considera...


In [13]:
df = df1

In [14]:
#Data Preprocessing


responses = df['Response'].tolist()
summaries = df['Summary/Takeaway'].tolist()

# Combine responses and summaries for training
all_texts = responses + summaries

# Tokenize and preprocess text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(all_texts)

# Define vocabulary size
vocab_size = len(tokenizer.word_index) + 1

# Convert text to sequences
sequences = tokenizer.texts_to_sequences(all_texts)

padded_sequences = pad_sequences(sequences)


In [15]:
padded_sequences

array([[ 0,  0,  0, ..., 25, 32, 26],
       [ 0,  0,  0, ...,  5,  1, 27],
       [ 0,  0,  0, ...,  5, 27, 43],
       ...,
       [ 0,  0,  0, ..., 10, 27, 35],
       [ 0,  0,  0, ...,  5, 27, 43],
       [ 0,  0,  0, ..., 89, 35, 90]], dtype=int32)

In [16]:
# Create input-output pairs
input_sequences = []
for sequence in sequences:
    for i in range(1, len(sequence)):
        n_gram_sequence = sequence[:i+1]
        input_sequences.append(n_gram_sequence)

# Pad sequences for uniform length
max_sequence_length = max([len(seq) for seq in input_sequences])
padded_input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')

# Create predictors and labels
X, y = padded_input_sequences[:, :-1], padded_input_sequences[:, -1]
y = to_categorical(y, num_classes=vocab_size)

In [17]:
X

array([[  0,   0,   0, ...,   0,   0,   2],
       [  0,   0,   0, ...,   0,   2,   8],
       [  0,   0,   0, ...,   2,   8, 241],
       ...,
       [  0,   0,   0, ...,   4,  22,  13],
       [  0,   0,   0, ...,  22,  13,  89],
       [  0,   0,   0, ...,  13,  89,  35]], dtype=int32)

In [18]:
y

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [19]:
# Specify the URL of the GloVe embeddings file
glove_url = 'https://nlp.stanford.edu/data/glove.6B.zip'

# Specify the local file name for saving the downloaded ZIP file
zip_file_path = 'glove.6B.zip'

# Specify the local directory for extracting the contents
extracted_dir_path = 'glove.6B'

# Download the GloVe ZIP file
response = requests.get(glove_url, stream=True)
with open(zip_file_path, 'wb') as zip_file:
    for chunk in response.iter_content(chunk_size=128):
        zip_file.write(chunk)

# Extract the contents of the ZIP file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extracted_dir_path)

# Clean up: Remove the ZIP file
os.remove(zip_file_path)

# The embeddings file is now available in the extracted directory
embedding_file = os.path.join(extracted_dir_path, 'glove.6B.100d.txt')

In [20]:
# Use the embedding_file in code
embedding_dim = 100

embedding_index = {}
with open(embedding_file, 'r', encoding='utf-8') as file:
    for line in file:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embedding_index[word] = coefs

# Create embedding matrix
embedding_matrix = np.zeros((vocab_size, embedding_dim))
for word, i in tokenizer.word_index.items():
    embedding_vector = embedding_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector


In [21]:
# Define LSTM-based model with pre-trained embeddings
model = Sequential()
model.add(Embedding(input_dim=vocab_size,
                    output_dim=embedding_dim,
                    input_length=X.shape[1],
                    embeddings_initializer=Constant(embedding_matrix),
                    trainable=False))  # Set trainable to False to use pre-trained embeddings
model.add(LSTM(300, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(300))
model.add(Dense(vocab_size, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Display model summary
print(model.summary())



None


In [22]:
# Train the model
model.fit(X, y, epochs=35, batch_size=128, verbose=1, validation_split=0.2)

Epoch 1/35
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 32ms/step - accuracy: 0.0802 - loss: 4.7003 - val_accuracy: 0.0937 - val_loss: 5.8208
Epoch 2/35
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 33ms/step - accuracy: 0.2764 - loss: 2.9925 - val_accuracy: 0.1953 - val_loss: 5.2276
Epoch 3/35
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 30ms/step - accuracy: 0.5868 - loss: 1.7534 - val_accuracy: 0.3086 - val_loss: 5.0882
Epoch 4/35
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - accuracy: 0.8350 - loss: 0.8242 - val_accuracy: 0.3579 - val_loss: 5.1130
Epoch 5/35
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - accuracy: 0.9249 - loss: 0.4252 - val_accuracy: 0.3694 - val_loss: 5.2056
Epoch 6/35
[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 32ms/step - accuracy: 0.9513 - loss: 0.2693 - val_accuracy: 0.3737 - val_loss: 5.2334
Epoch 7/35
[1m115/11

<keras.src.callbacks.history.History at 0x7b6d50a9c220>

In [42]:
# Function which takes input and generate summary


def generate_summary(seed_text, max_length=50):
    for _ in range(max_length):
        # Tokenize the input sequence
        seed_sequence = tokenizer.texts_to_sequences([seed_text])[0]
        # Pad the input sequence
        padded_seed_sequence = pad_sequences([seed_sequence], maxlen=max_sequence_length-1, padding='pre')
        # Predict the next word
        predicted_index = np.argmax(model.predict(padded_seed_sequence), axis=-1)
        # Convert index to word
        predicted_word = tokenizer.index_word.get(predicted_index[0], '')
        # Update the seed text for the next iteration
        seed_text += ' ' + predicted_word
        if predicted_word == '.':
            break  # Break if a period is predicted, assuming the end of a sentence
    return seed_text.split('.')[-1].strip()


# Test Data
test_data = [
    "The project's innovative approach to problem-solving is highly commendable. The use of storytelling elements in the documentation is engaging. How did you validate the performance of your ML model? Can your project be extended to include real-time streaming data? Constructive feedback on the project's impact and potential improvements. Consider discussing potential directions for future research"
]

# Generate responses
for input_text in test_data:
    generated_summary = generate_summary(input_text)
    print("\nModel Suggestion:")
    print("-", generated_summary)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18

In [24]:
# Save the model to a file
model.save('model1.h5')



In [43]:
# Save the model weights with the correct file extension
model.save_weights("model1.weights.h5")

# Save the model architecture in JSON format
model_json = model.to_json()
with open("model11.json", "w") as json_file:
    json_file.write(model_json)


In [39]:
import pickle

# Assuming `tokenizer` is your trained Tokenizer instance
with open('tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)


In [44]:
# Save the embedding matrix to a file
np.save('embedding_matrix11.npy', embedding_matrix)


In [45]:
from tensorflow.keras.models import model_from_json
from tensorflow.keras.initializers import Constant
import numpy as np

# Load model architecture
with open("model11.json", "r") as json_file:
    model_json = json_file.read()
loaded_model = model_from_json(model_json)

# Load the saved weights (excluding embedding)
loaded_model.load_weights("/content/model1.weights.h5")

# Manually load the embedding matrix
embedding_matrix = np.load('embedding_matrix.npy')  # Load the pre-saved embedding matrix

# Reassign the embedding weights to the model
loaded_model.layers[0].set_weights([embedding_matrix])

# Continue using the model


  saveable.load_own_variables(weights_store.get(inner_path))


In [46]:
# prompt: use above model

def generate_summary(seed_text, max_length=50):
    for _ in range(max_length):
        # Tokenize the input sequence
        seed_sequence = tokenizer.texts_to_sequences([seed_text])[0]
        # Pad the input sequence
        padded_seed_sequence = pad_sequences([seed_sequence], maxlen=max_sequence_length-1, padding='pre')
        # Predict the next word
        predicted_index = np.argmax(loaded_model.predict(padded_seed_sequence), axis=-1)
        # Convert index to word
        predicted_word = tokenizer.index_word.get(predicted_index[0], '')
        # Update the seed text for the next iteration
        seed_text += ' ' + predicted_word
        if predicted_word == '.':
            break  # Break if a period is predicted, assuming the end of a sentence
    return seed_text.split('.')[-1].strip()

# Test Data
test_data = [
    "The project's innovative approach to problem-solving is highly commendable. The use of storytelling elements in the documentation is engaging. How did you validate the performance of your ML model? Can your project be extended to include real-time streaming data? Constructive feedback on the project's impact and potential improvements. Consider discussing potential directions for future research"
]

# Generate responses
for input_text in test_data:
    generated_summary = generate_summary(input_text)
    print("\nModel Suggestion:")
    print("-", generated_summary)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 285ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2

In [48]:
#---------------------------------------------------------------------------------------------------------------------

In [None]:
#Freeze the requiremments
!pip3 freeze > requirements.txt

In [None]:
# Saving the model for deployment on cloud

filename = 'sbmodel.sav'
pickle.dump(model, open(filename, 'wb'))