In [40]:
from google.colab import files
uploaded = files.upload()

Saving doctor_rating.csv to doctor_rating.csv


In [41]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dropout, Conv1D, GlobalMaxPooling1D, Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from gensim.models import Word2Vec

In [42]:
# Load the dataset
data = pd.read_csv('doctor_rating.csv')

In [43]:
data.head()

Unnamed: 0.1,Unnamed: 0,rating,reviews
0,0,2,he explained initially that it takes 4-5 sitin...
1,1,5,great dr definitely recommend he recommends le...
2,2,1,doctor came and spent 9 seconds and recommende...
3,3,1,i am completely satisfied with the consultatio...
4,4,1,my experience was nice dr dyed was cool and co...


In [44]:
data.isna().sum()

Unnamed: 0    0
rating        0
reviews       0
dtype: int64

In [46]:
data.rating.value_counts()

1    104
5     18
2     13
3      4
4      4
Name: rating, dtype: int64

In [47]:
# Preprocess the data
X = data['reviews'].values
y = data['rating'].values

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [48]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [49]:
# Tokenize the text
max_words = 10000  # Maximum number of words to keep in the vocabulary
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)

In [50]:
# Convert text to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

In [51]:
# Pad sequences to have the same length
max_sequence_length = 100  # Maximum length of a sequence
X_train_pad = pad_sequences(X_train_seq, maxlen=max_sequence_length)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_sequence_length)

In [52]:
# Train Word2Vec model
word2vec_model = Word2Vec(sentences=X_train_seq, 
                          vector_size=100, 
                          window=5, 
                          min_count=1, 
                          workers=4)

In [53]:
# Create an embedding matrix
embedding_dim = 100
vocab_size = len(tokenizer.word_index) + 1
embedding_matrix = np.zeros((vocab_size, embedding_dim))
for word, i in tokenizer.word_index.items():
    if word in word2vec_model.wv:
        embedding_matrix[i] = word2vec_model.wv[word]

In [54]:
# Build the CNN model with Word2Vec embeddings
model = Sequential()
model.add(Embedding(vocab_size,
                    embedding_dim, 
                    weights=[embedding_matrix], 
                    input_length=max_sequence_length, 
                    trainable=False))
model.add(Dropout(0.5))

In [55]:
# Convolutional layers with different filter sizes
num_filters = 64
filter_sizes = [3, 4, 5]
for filter_size in filter_sizes:
    model.add(Conv1D(num_filters, filter_size, activation='relu'))

model.add(GlobalMaxPooling1D())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(5, activation='sigmoid'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [56]:
# Train the model
batch_size = 64
epochs = 10
model.fit(X_train_pad, 
          y_train, batch_size=batch_size, 
          epochs=epochs, 
          validation_data=(X_test_pad, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f13bd0473d0>

In [57]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test_pad, y_test)
print('Test Loss:', loss)
print('Test Accuracy:', accuracy)

Test Loss: 1.5876092910766602
Test Accuracy: 0.7586206793785095


In [60]:
# Manual input for prediction
text = input('enter the review : ')
text_seq = tokenizer.texts_to_sequences([text])
text_pad = pad_sequences(text_seq, maxlen=max_sequence_length)
prediction = model.predict(text_pad)
predicted_class = np.argmax(prediction)
predicted_label = label_encoder.inverse_transform([predicted_class])[0]
print("Predicted label:", predicted_label)

enter the review : Dr. Williams is absolutely amazing! She is highly knowledgeable, attentive, and genuinely cares about her patients. I felt heard and well taken care of during my visit. I would highly recommend her to anyone in need of a great doctor.
Predicted label: 1
