### Detecting sarcasm in text using CNNs

In [None]:
import pandas as pd
import numpy as np
import re
import json
import gensim
import math
import nltk
nltk.download('stopwords')
nltk.download('wordnet')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from gensim.models import KeyedVectors
import keras
from keras.models import Sequential, Model
from keras import layers
from keras.layers import Dense, Dropout, Conv1D, GlobalMaxPooling1D
import h5py

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


#### Reading data

In [None]:
def parse_data(file):
    for l in open(file,'r'):
        yield json.loads(l)
data = list(parse_data('/content/Sarcasm_Headlines_Dataset_v2.json'))
df = pd.DataFrame(data)

#### Performing basic data analysis and preprocessing our data


In [None]:
df.head(5)

Unnamed: 0,is_sarcastic,headline,article_link
0,1,thirtysomething scientists unveil doomsday clo...,https://www.theonion.com/thirtysomething-scien...
1,0,dem rep. totally nails why congress is falling...,https://www.huffingtonpost.com/entry/donna-edw...
2,0,eat your veggies: 9 deliciously different recipes,https://www.huffingtonpost.com/entry/eat-your-...
3,1,inclement weather prevents liar from getting t...,https://local.theonion.com/inclement-weather-p...
4,1,mother comes pretty close to using word 'strea...,https://www.theonion.com/mother-comes-pretty-c...


In [None]:
# Sarcastic Headline
df['headline'][0]

'thirtysomething scientists unveil doomsday clock of hair loss'

In [None]:
# Non-sarcastic Headline
df['headline'][1]

'dem rep. totally nails why congress is falling short on gender, racial equality'

In [None]:
df.pop('article_link')

0        https://www.theonion.com/thirtysomething-scien...
1        https://www.huffingtonpost.com/entry/donna-edw...
2        https://www.huffingtonpost.com/entry/eat-your-...
3        https://local.theonion.com/inclement-weather-p...
4        https://www.theonion.com/mother-comes-pretty-c...
                               ...                        
28614    https://www.theonion.com/jews-to-celebrate-ros...
28615    https://local.theonion.com/internal-affairs-in...
28616    https://www.huffingtonpost.com/entry/andrew-ah...
28617    https://www.theonion.com/mars-probe-destroyed-...
28618    https://www.theonion.com/dad-clarifies-this-no...
Name: article_link, Length: 28619, dtype: object

In [None]:
df.head(5)

Unnamed: 0,is_sarcastic,headline
0,1,thirtysomething scientists unveil doomsday clo...
1,0,dem rep. totally nails why congress is falling...
2,0,eat your veggies: 9 deliciously different recipes
3,1,inclement weather prevents liar from getting t...
4,1,mother comes pretty close to using word 'strea...


In [None]:
len(df)

28619

In [None]:
classes = np.unique(np.array(df['is_sarcastic']))
classes

array([0, 1])

#### Data preprocessing

In [None]:
import pandas as pd

def preprocess(corpus, remove_stopwords=True):
    '''
    Function to preprocess the text corpus

    Input :
        corpus : Text data corpus
        remove_stopwords : Boolean, True if stopwords need to be removed

    Output : Returns the processed text corpus
    '''
    cleaned_corpus = []  # Create an empty list to store processed text

    for row in corpus:
        qs = []
        for word in row.split():
            p1 = re.sub(pattern='[^a-zA-Z]', repl=' ', string=word)
            p1 = p1.lower()
            qs.append(p1)
        cleaned_corpus.append(' '.join(qs))

    cleaned_corpus = pd.Series(cleaned_corpus)
    return cleaned_corpus

In [None]:
def stopwords_removal(corpus):
    stop = set(stopwords.words('english'))
    corpus = [[x for x in x.split() if x not in stop] for x in corpus]
    return corpus

In [None]:
def stem(corpus, stem_type = None):
    if stem_type == 'snowball':
        stemmer = SnowballStemmer(language = 'english')
        corpus = [[stemmer.stem(x) for x in x] for x in corpus]
    else :
        stemmer = PorterStemmer()
        corpus = [[stemmer.stem(x) for x in x] for x in corpus]
    return corpus

In [None]:
def preprocess(corpus, cleaning=True, stemming=False, stem_type=None, lemmatization=False, remove_stopwords=True):

    def text_clean(text):
        cleaned_text = text.lower()
        cleaned_text = cleaned_text.strip()
        cleaned_text = ' '.join(cleaned_text.split())
        return cleaned_text

    '''
    Purpose : Function to perform all pre-processing tasks (cleaning, stemming, lemmatization, stopwords removal etc.)

    Input :
    'corpus' - Text corpus on which pre-processing tasks will be performed

    'cleaning', 'stemming', 'lemmatization', 'remove_stopwords' - Boolean variables indicating whether a particular task should
                                                                  be performed or not
    'stem_type' - Choose between Porter stemmer or Snowball(Porter2) stemmer. Default is "None", which corresponds to Porter
                  Stemmer. 'snowball' corresponds to Snowball Stemmer

    Note : Either stemming or lemmatization should be used. There's no benefit of using both of them together

    Output : Returns the processed text corpus

    '''
    if cleaning:
        corpus = [text_clean(text) for text in corpus]

    if remove_stopwords:
        corpus = stopwords_removal(corpus)
    else:
        corpus = [[x for x in x.split()] for x in corpus]

    if lemmatization:
        corpus = lemmatize(corpus)

    if stemming:
        corpus = stem(corpus, stem_type)

    corpus = [' '.join(x) for x in corpus]

    return corpus


In [None]:
df['headline']

0        thirtysomething scientists unveil doomsday clo...
1        dem rep. totally nails why congress is falling...
2        eat your veggies: 9 deliciously different recipes
3        inclement weather prevents liar from getting t...
4        mother comes pretty close to using word 'strea...
                               ...                        
28614         jews to celebrate rosh hashasha or something
28615    internal affairs investigator disappointed con...
28616    the most beautiful acceptance speech this week...
28617    mars probe destroyed by orbiting spielberg-gat...
28618                   dad clarifies this not a food stop
Name: headline, Length: 28619, dtype: object

In [None]:
headlines = preprocess(df['headline'], remove_stopwords = True)
headlines[0:5]

['thirtysomething scientists unveil doomsday clock hair loss',
 'dem rep. totally nails congress falling short gender, racial equality',
 'eat veggies: 9 deliciously different recipes',
 'inclement weather prevents liar getting work',
 "mother comes pretty close using word 'streaming' correctly"]

#### Loading the Word2Vec model and vectorizing our data

In [None]:
model = KeyedVectors.load_word2vec_format('/content/GoogleNews-vectors-negative300.bin', binary=True)

In [None]:
MAX_LENGTH = 10
VECTOR_SIZE = 300

#### Data Vectorization and Standardization

In [None]:
def vectorize_data(data):
    vectors = []
    padding_vector = [0.0] * VECTOR_SIZE

    for data_point in data:
        data_point_vectors = []
        count = 0
        tokens = data_point.split()

        for token in tokens:
            if count >= MAX_LENGTH:
                break
            if token in model:
                data_point_vectors.append(model[token])
            count += 1


        while len(data_point_vectors) < MAX_LENGTH:
            data_point_vectors.append(padding_vector)

        vectors.append(data_point_vectors)

    return vectors

In [None]:
vectorized_headlines = vectorize_data(headlines)

In [None]:
# Data Validation
for i, vec in enumerate(vectorized_headlines):
    if len(vec) != MAX_LENGTH:
        print(i)

In [None]:
len(vectorized_headlines[1])

10

In [None]:
len(vectorized_headlines)

28619

In [None]:
# Train Test Split and Conversion of Data Into Form expected by Convolutional Neural Network
train_div = math.floor(0.7 * len(vectorized_headlines))
train_div

20033

In [None]:
X_train = vectorized_headlines[:train_div]
y_train = df['is_sarcastic'][:train_div]
X_test = vectorized_headlines[train_div:]
y_test = df['is_sarcastic'][train_div:]

print('The size of X_train is:', len(X_train), '\nThe size of y_train is:', len(y_train),
      '\nThe size of X_test is:', len(X_test), '\nThe size of y_test is:', len(y_test))

The size of X_train is: 20033 
The size of y_train is: 20033 
The size of X_test is: 8586 
The size of y_test is: 8586


In [None]:
X_train = np.reshape(X_train, (len(X_train), MAX_LENGTH, VECTOR_SIZE))
X_test = np.reshape(X_test, (len(X_test), MAX_LENGTH, VECTOR_SIZE))
y_train = np.array(y_train)
y_test = np.array(y_test)

In [None]:
# Defining Neural Network Model Parameters
FILTERS=8
KERNEL_SIZE=3
HIDDEN_LAYER_1_NODES=10
HIDDEN_LAYER_2_NODES=5
DROPOUT_PROB=0.35
NUM_EPOCHS=10
BATCH_SIZE=50

####CNN and FeedForward Neural Network for Detecting Sarcasm

In [None]:
from keras.models import Sequential
from keras.layers import Conv1D, GlobalMaxPooling1D, Dense, Dropout
model = Sequential()
model.add(Conv1D(FILTERS,
                 KERNEL_SIZE,
                 padding='same',
                 strides=1,
                 activation='relu',
                 input_shape=(MAX_LENGTH, VECTOR_SIZE)))
model.add(GlobalMaxPooling1D())
model.add(Dense(HIDDEN_LAYER_1_NODES, activation='relu'))
model.add(Dropout(DROPOUT_PROB))
model.add(Dense(HIDDEN_LAYER_2_NODES, activation='relu'))
model.add(Dropout(DROPOUT_PROB))
model.add(Dense(1, activation='sigmoid'))
print(model.summary())

  super().__init__(


None


In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
training_history = model.fit(X_train, y_train, epochs=NUM_EPOCHS, batch_size=BATCH_SIZE)

Epoch 1/10
[1m401/401[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - accuracy: 0.5518 - loss: 0.6812
Epoch 2/10
[1m401/401[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.6978 - loss: 0.5923
Epoch 3/10
[1m401/401[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.7518 - loss: 0.5402
Epoch 4/10
[1m401/401[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 9ms/step - accuracy: 0.7706 - loss: 0.5132
Epoch 5/10
[1m401/401[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.7858 - loss: 0.4957
Epoch 6/10
[1m401/401[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.7986 - loss: 0.4664
Epoch 7/10
[1m401/401[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.8076 - loss: 0.4519
Epoch 8/10
[1m401/401[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.8169 - loss: 0.4378
Epoch 9/10
[1m401/401[0m [32m━━━━━━

####  Evaluation

In [None]:
loss, accuracy = model.evaluate(X_test, y_test, verbose=False)
print("Testing Accuracy:  {:.4f}".format(accuracy))

Testing Accuracy:  0.7610


In [None]:
model_structure = model.to_json()
with open("Output Files/sarcasm_detection_model_cnn.json", "w") as json_file:
    json_file.write(model_structure)
model.save_weights("Output Files/sarcsm_detection_model_cnn.weights.h5")

#### Building a text generator using LSTMs


In [None]:
# Importing required libraries
import nltk
from nltk.corpus import stopwords
import pandas as pd
import numpy as np
import re
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Embedding
from keras.utils import to_categorical

In [None]:
data = pd.read_csv('Dataset/hotel_data.csv')

In [None]:
data.head(5)

Unnamed: 0,area,city,country,crawl_date,highlight_value,hotel_overview,hotel_star_rating,image_urls,in_your_room,is_value_plus,...,property_type,qts,query_time_stamp,room_types,site_review_count,site_review_rating,sitename,state,traveller_rating,uniq_id
0,Hardasji Ki Magri,Udaipur,India,2016-06-21,{{facility}},|Zion Home Stay is located in a city that sets...,1 star,,{{value}},no,...,Hotel,,2016-06-21 04:06:50 +0000,,/5,4.5,makemytrip,,,78ddf880bd7937d384ff278cc5b39d6e
1,Near Nai Gaon,Udaipur,India,2016-06-21,{{facility}},| Araliayas Resorts is a 3 star hotel located ...,3 star,,{{value}},no,...,Hotel,,2016-06-21 04:06:50 +0000,,/5,4.5,makemytrip,,,9f9f9cbb2f7df8089b63d5cdeb257944
2,Near Bagore Ki Haveli,Udaipur,India,2016-06-21,{{facility}},|A 2 star property is located at 24 km from Ma...,2 star,,{{value}},no,...,Hotel,,2016-06-21 04:06:50 +0000,,/5,,makemytrip,,,b314bb7fa8bfb1ed306f517be21d729e
3,Dabok,Udaipur,India,2016-06-21,Airport Transfer|Car rental|Conference Hall|Cu...,|SNP House Airport Hotel And Restaurant is loa...,1 star,//imghtlak.mmtcdn.com/images/hotels/2014071815...,,no,...,Hotel,,2016-06-21 04:06:50 +0000,`standard,/5,,makemytrip,,,e6f5bb3c2d76a78d978b9ceb0e31ec56
4,East Udaipur,Udaipur,India,2016-06-21,{{facility}},| Hotel Pichola Haveli is situated in the beau...,2 star,,{{value}},no,...,Hotel,,2016-06-21 04:06:50 +0000,,/5,3.7,makemytrip,,,63072c301427b6ca450d31eea127bcf0


In [None]:
data.city.value_counts()

city
NewDelhiAndNCR    1163
Goa               1122
Mumbai             543
Jaipur             534
Bangalore          512
                  ... 
Kollur               1
Madla                1
Jeypore              1
Jispa                1
Haldia               1
Name: count, Length: 770, dtype: int64

In [None]:
array = ['Mumbai']
data = data.loc[data['city'].isin(array)]
data.head(5)

Unnamed: 0,area,city,country,crawl_date,highlight_value,hotel_overview,hotel_star_rating,image_urls,in_your_room,is_value_plus,...,property_type,qts,query_time_stamp,room_types,site_review_count,site_review_rating,sitename,state,traveller_rating,uniq_id
294,Charai,Mumbai,India,2016-08-28,Doctor on Call|Front desk|Laundry Service|Park...,"Nestled in Mumbai, a city with strong historic...",3,,Bathroom Toiletries|Attached Bathroom|Hot & Co...,no,...,Hotel,2016-08-28 16:13:39 +0000,2016-08-28 16:13:39 +0000,Ac Superior Room|A/c Standard Rooms Double Occ...,,,makemytrip,Maharashtra,,d78fae90ef2e1b5c2dfd547c61763a25
309,Andheri (East),Mumbai,India,2016-08-28,Air Conditioned|Airport Transfer|Conference Ha...,3 km from Chhatrapati Shivaji International Ai...,2,,Bathroom Toiletries|Daily Newspaper|Kitchenett...,no,...,Hotel,2016-08-28 16:13:39 +0000,2016-08-28 16:13:39 +0000,Executive|Deluxe,,,makemytrip,Maharasta,Location:3.2/5 | Hospitality:3.1/5 | Facilitie...,030865f741982beb373efddecdc6d6c3
321,Khar,Mumbai,India,2016-08-28,Airport/Rlwy Stn Transfer|Bar|Conference Hall|...,Location Hotel Royal Garden is situated on Juh...,3,,Electronic Safe|Bathroom Toiletries|Daily News...,no,...,Hotel,2016-08-28 16:13:39 +0000,2016-08-28 16:13:39 +0000,Superior Executive,,,makemytrip,Maharashtra,Location:4.5/5 | Hospitality:3.4/5 | Facilitie...,a1ced509350038775a7700ec67796bc2
334,Andheri (East),Mumbai,India,2016-08-28,24 Hour Check in-Icon|24 hour reception|24 hou...,City Guest House is a beautiful property locat...,2,,Bathroom Toiletries|Hot/cold Water|Attached Ba...,no,...,Hotel,2016-08-28 16:13:39 +0000,2016-08-28 16:13:39 +0000,Standard Room|Deluxe Room|Triple Deluxe Room|S...,,,makemytrip,Maharashtra,Location:4.3/5 | Hospitality:3.8/5 | Facilitie...,f2820ae7707668ed6906bb227921f720
1238,Andheri (East),Mumbai,,2016-08-22,24 Hour Check in-Icon|24 hour reception|24 hou...,Sai Residency Hotel is situated in the City of...,2,,Bathroom Toiletries|Attached Bathroom|Hot & Co...,no,...,Hotel,2016-08-22 22:10:53 +0000,2016-08-22 22:10:53 +0000,Deluxe Dbl Air Cooled,,,makemytrip,MAHARASHTRA,,b4af24952027ffbcd85a91bb6fe23f5d


In [None]:
data = data.hotel_overview
data = data.dropna()
stop = set(stopwords.words('english'))
def stopwords_removal(data_point):
    data = [x for x in data_point.split() if x not in stop]
    return data

In [None]:
def clean_data(data):
    cleaned_data = []
    all_unique_words_in_each_description = []
    for entry in data:
        entry = re.sub(pattern='[^a-zA-Z]',repl=' ',string = entry)
        entry = re.sub(r'\b\w{0,1}\b', repl=' ',string = entry)
        entry = entry.lower()
        entry = stopwords_removal(entry)
        cleaned_data.append(entry)
        unique = list(set(entry))
        all_unique_words_in_each_description.extend(unique)
    return cleaned_data, all_unique_words_in_each_description

In [None]:
def unique_words(data):
    unique_words = set(all_unique_words_in_each_description)
    return unique_words, len(unique_words)

In [None]:
cleaned_data, all_unique_words_in_each_description = clean_data(data)
unique_words, length_of_unique_words = unique_words(all_unique_words_in_each_description)
cleaned_data[0]

['nestled',
 'mumbai',
 'city',
 'strong',
 'historical',
 'links',
 'wonderful',
 'british',
 'architecture',
 'museums',
 'beaches',
 'places',
 'worship',
 'true',
 'galaxy',
 'stars',
 'bollywood',
 'reigns',
 'supreme',
 'hotel',
 'divya',
 'international',
 'delightful',
 'leisure',
 'absolute',
 'blend',
 'service',
 'charm',
 'efficiency',
 'hotel',
 'offers',
 'facilities',
 'like',
 'front',
 'desk',
 'parking',
 'laundry',
 'doctor',
 'call',
 'many',
 'aims',
 'extend',
 'best',
 'possible',
 'hospitality',
 'experience',
 'revered',
 'customers',
 'hotel',
 'located',
 'distance',
 'workshop',
 'bus',
 'stop',
 'km',
 'chhatrapati',
 'shivaji',
 'international',
 'airport',
 'km',
 'chhatrapati',
 'shivaji',
 'terminus',
 'guests',
 'head',
 'prominent',
 'tourist',
 'attractions',
 'like',
 'sanjay',
 'gandhi',
 'national',
 'park',
 'kidzania',
 'mumbai',
 'haji',
 'ali',
 'mosque',
 'iskon',
 'temple',
 'shree',
 'siddhivinayak',
 'temple',
 'many',
 'shoppers',
 'fun',

In [None]:
length_of_unique_words

3395

In [None]:
def build_indices(unique_words):
    word_to_idx = {}
    idx_to_word = {}
    for i, word in enumerate(unique_words):
        word_to_idx[word] = i
        idx_to_word[i] = word
    return word_to_idx, idx_to_word
word_to_idx, idx_to_word = build_indices(unique_words)
def prepare_corpus(corpus, word_to_idx):

    sequences = []
    for line in corpus:
        tokens = line
        for i in range(1, len(tokens)):
            i_gram_sequence = tokens[:i+1]
            i_gram_sequence_ids = []

            for j, token in enumerate(i_gram_sequence):
                i_gram_sequence_ids.append(word_to_idx[token])

            sequences.append(i_gram_sequence_ids)

    return sequences

In [None]:
sequences = prepare_corpus(cleaned_data, word_to_idx)
max_sequence_len = max([len(x) for x in sequences])
print(sequences[0])
print(sequences[1])

[3019, 234]
[3019, 234, 2214]


In [None]:
print(idx_to_word[1647])
print(idx_to_word[867])
print(idx_to_word[1452])

alibaugh
joints
pvt


In [None]:
len(sequences)

51836

In [None]:
max_sequence_len

308

In [None]:
def build_input_data(sequences, max_sequence_len, length_of_unique_words):
    sequences = np.array(pad_sequences(sequences, maxlen = max_sequence_len, padding = 'pre'))
    X = sequences[:,:-1]
    y = sequences[:,-1]
    y = to_categorical(y, length_of_unique_words)
    return X, y

In [None]:
X, y = build_input_data(sequences, max_sequence_len, length_of_unique_words)

print("Shape of X:", X.shape)
print("Shape of y:", y.shape)

Shape of X: (51836, 307)
Shape of y: (51836, 3395)


In [None]:
def create_model(max_sequence_len, length_of_unique_words):
    model = Sequential()
    model.add(Embedding(length_of_unique_words, 10, input_shape=(max_sequence_len,)))
    model.add(LSTM(128))
    model.add(Dropout(0.2))
    model.add(Dense(length_of_unique_words, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    return model

In [None]:
model = create_model(max_sequence_len, length_of_unique_words)
print(model.summary())

  super().__init__(**kwargs)


None


In [None]:
len(X)

51836

In [None]:
model.fit(X, y, batch_size = 1024, epochs=10)

Epoch 1/10
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m281s[0m 5s/step - loss: 7.6534
Epoch 2/10
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m313s[0m 5s/step - loss: 6.5668
Epoch 3/10
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m322s[0m 5s/step - loss: 6.5608
Epoch 4/10
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m281s[0m 6s/step - loss: 6.5198
Epoch 5/10
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m287s[0m 6s/step - loss: 6.4941
Epoch 6/10
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m263s[0m 5s/step - loss: 6.4370
Epoch 7/10
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m317s[0m 6s/step - loss: 6.3924
Epoch 8/10
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m239s[0m 5s/step - loss: 6.3239
Epoch 9/10
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 1s/step - loss: 6.2746
Epoch 10/10
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 966ms/step - loss: 6.231

<keras.src.callbacks.history.History at 0x22b7f06e4b0>

In [None]:
import numpy as np

def generate_text(seed_text, next_words, model, max_seq_len):
    cleaned_data = clean_data([seed_text])
    sequences = prepare_corpus(cleaned_data[0], word_to_idx)
    input_seq = sequences[-1]
    for _ in range(next_words):
        predicted_probs = model.predict(np.expand_dims(input_seq, axis=0), verbose=0).flatten()
        predicted_index = np.random.choice(len(predicted_probs), p=predicted_probs)
        output_word = idx_to_word[predicted_index]
        seed_text = seed_text + " " + output_word
        input_seq = np.append(input_seq[1:], predicted_index)

    return seed_text.title()


In [None]:
print(generate_text("in Mumbai there we need", 30, model, max_sequence_len))

In Mumbai There We Need Mins Clock Glimpse Nutritious India Station Provides Wardrobes Vessels Lokhandwala Full Hanging Mail Siddharthnagar Commute Touch Others Journey Stocks Celestial Essenza Fancy Zodiac Harmony Bhavani Form Geyser Situated Tradition Stories


In [None]:
print(generate_text("Best Hotel Mumbai", 30, model, max_sequence_len))

Best Hotel Mumbai Favourably Breads Hire Varied Throw Ayurveda Accommodating True Spaces Bunder Mani Would Enigma Well Celebrate Dana Watch Atlantic Swigs Easy Assures Prestige Replete Commotions Choice Kidzania Pheonix Less Shivaji Caprice


In [None]:
print(generate_text("The beauty of the city", 30, model, max_sequence_len))


The Beauty Of The City Wales Beth Inhouse Pressure Drop Coral Holds Palatable Mins Pathar Excellent Bangladesh Bliss Cakes Stocked Superior Region Domestic Devices Galaxy Peninsular Ambience Consists Applying Considers Greenery Shubhangan Separately Babulnath Wind


In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  0


In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [None]:
model_structure = model.to_json()
with open("Output Files/text_generation_using_LSTM.json", "w") as json_file:
    json_file.write(model_structure)
model.save_weights("Output Files/text_generation_using_LSTM.weights.h5")

In [None]:
import tensorflow as tf

print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.16.0-rc0


In [None]:
import tensorflow as tf

print("TensorFlow version:", tf.__version__)
print("GPU available:", tf.config.list_physical_devices('GPU'))


TensorFlow version: 2.16.0-rc0
GPU available: []
