In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Dropout, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import gensim.downloader as api
import re

# Read the CSV file
data = pd.read_csv('Day_3.csv')

# Extract the text and label columns
texts = data['Message'].tolist()
labels = [f"source={source} destination={destination}" for source, destination in zip(data['Source'], data['Destination'])]

# Convert texts and labels to lowercase
texts = [text.lower().strip() for text in texts]

labels = [label.lower().strip() for label in labels]

# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Tokenize and pad sequences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
max_sequence_length = max(len(sequence) for sequence in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

# Load GloVe word embeddings
word_embeddings = api.load("glove-wiki-gigaword-300")  # Load GloVe embeddings

# Create an embedding matrix
embedding_dimension = word_embeddings.vector_size
word_index = tokenizer.word_index
embedding_matrix = np.zeros((len(word_index) + 1, embedding_dimension))

for word, index in word_index.items():
    if word in word_embeddings:
        embedding_matrix[index] = word_embeddings[word]

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, encoded_labels, test_size=0.2, random_state=42)

# Build LSTM model with its own embedding layer
model = Sequential()
model.add(Embedding(len(word_index) + 1, embedding_dimension, weights=[embedding_matrix], input_length=max_sequence_length, trainable=False))
model.add(Bidirectional(LSTM(128, return_sequences=True)))
model.add(Dropout(0.5))
model.add(Bidirectional(LSTM(64, return_sequences=True)))
model.add(Bidirectional(LSTM(64)))
model.add(Dense(32, activation='relu'))
model.add(Dense(len(label_encoder.classes_), activation='softmax'))

# Compile and train the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=70, batch_size=256, validation_data=(X_test, y_test))



  data = pd.read_csv('Day_3.csv')


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70


<keras.callbacks.History at 0x7f4da0f8cdc0>

In [2]:
#model.save_weights('SD70ep.h5')

In [None]:
for i in range(200):
    m = df.iloc[i, 1]
    predefined_string = str(m)

    user_sequence = tokenizer.texts_to_sequences([predefined_string])
    user_padded_sequence = pad_sequences(user_sequence, maxlen=max_sequence_length)

    # Predict features of predefined string
    predicted_label = model.predict(user_padded_sequence)  # Perform the prediction

    predicted_label_index = np.argmax(predicted_label)
    decoded_label = label_encoder.inverse_transform([predicted_label_index])[0]

    # Print the predicted label
    print("Predicted label:", decoded_label.title())
    print(df.lloc[i,'Message'])
    print()


Predicted label: Source= Shirdi Destination=Nashik Station
Sedan Car Shirdi To Kurla Station Drop 1 Pm 8856000999

Predicted label: Source=Mumbai Thane Destination=Khed Dapoli
Available Mumbai Thane To Khed Dapoli Drop Tomorrow 5:00 A.M. Sedan Car Me 9029650197

Predicted label: Source= Pune Destination=Ratnagiri
2 Days Round Trip Pune To Ratnagiri Car Type :- Ertiga Disel New Time :- Tomorrow 6 Am Me :- 9768868301

Predicted label: Source=Panchgani Destination=Pune
Panchgani To Pune Drop Innova & Eritka 7Seat Urgent Me 72197 67606

Predicted label: Source= Kolhapur Destination=Mumbai
Advance Innova With Carrie 12/04/23 6:30 Kolhapur To Mumbai ( ) Drop . All Including 7500 Rohit 9922647446 Gp Advance Campalsri No Ertiga Only Innova

Predicted label: Source=Shirdi Destination=Mumbai
Shirdi To Mumbai Kulaba Drop Swift Dizer Arjent Any Interest Mi 9860331325

Predicted label: Source= Ahmednagar Destination=Mumbai Airport
Sedan Car Pm Ahmednagar To Mumbai Airport Drop 9767969076

Predicted

In [None]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, 202, 100)          978100    
                                                                 
 bidirectional_9 (Bidirectio  (None, 202, 256)         234496    
 nal)                                                            
                                                                 
 dropout_3 (Dropout)         (None, 202, 256)          0         
                                                                 
 bidirectional_10 (Bidirecti  (None, 202, 128)         164352    
 onal)                                                           
                                                                 
 bidirectional_11 (Bidirecti  (None, 128)              98816     
 onal)                                                           
                                                      