# keras embeddings technique

# Import packages

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


# Import packages

In [None]:
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.tokenize import sent_tokenize, word_tokenize

# Loading data

In [None]:
stop_words = stopwords.words('english')
df = pd.read_csv('../input/nlp-getting-started/train.csv',encoding="latin_1")
df_test = pd.read_csv('../input/nlp-getting-started/test.csv',encoding="latin_1")
data1=pd.read_csv('../input/nlp-getting-started/train.csv')

In [None]:
texts = df['text']
texts_test = df_test['text']
y = df.pop('target')

# Data Preprocesing

In [None]:
def pre_process_data(text):
    lemm = WordNetLemmatizer()
    text  = re.sub(r"[^0-9a-zA-Z]+",' ',text)
    tokenized = word_tokenize(text)
    text = [lemm.lemmatize(i.lower()) for i in tokenized if not(i.lower() in stop_words) and i.isalpha()]
    text = [i.replace('http','') for i in text]
    text = [i.replace('co','') for i in text]
    text = [i.replace('amp','') for i in text]
    return ' '.join(text)

In [None]:
import re
texts_test = [pre_process_data(i) for i in texts_test]
texts = [pre_process_data(i) for i in texts]

In [None]:
labels   = data1['target'].values.tolist()

In [None]:
import keras_preprocessing
from keras_preprocessing.text import one_hot
from keras_preprocessing.sequence import pad_sequences

In [None]:
# integer encode the documents
vocab_size = 10000
encoded_docs = [one_hot(d, vocab_size) for d in texts ]


In [None]:
max_length = 1000
padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='post')
print(padded_docs)

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential

In [None]:
from tensorflow.keras import layers

# Define the model

In [None]:
# define the model
model = tf.keras.Sequential()
model.add(layers.Embedding(vocab_size, 10, input_length=max_length))
model.add(layers.Flatten())
model.add(layers.Dense(1, activation='sigmoid'))

In [None]:
#fitting with ndarray
labels=np.array(labels) 
#data=np.array(labels,dtype=float)

# Compile the model

In [None]:
# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# summarize the model
model.summary()
# fit the model
batch_size = 32
epochs = 50
history = model.fit(padded_docs, labels,batch_size=batch_size,epochs=epochs)
history

# Plotting Graphs for accuracy

In [None]:
plt.figure(0)
plt.plot(history.history['accuracy'], label='training accuracy')
plt.plot(history.history['loss'], label='Value Loss')
plt.title('Training accuracy & Value Loss')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()

# Saveing model

In [None]:
model.save("Natural Language Processing with Disaster Tweets.h5")

# Load model

In [None]:
#from keras.models import load_model
#model = load_model(./Natural Language Processing with Disaster Tweets.h5')
model = tf.keras.models.load_model("./Natural Language Processing with Disaster Tweets.h5")

# Testinng Data

In [None]:
texts_test

In [None]:
# integer encode the documents
vocab_size = 10000
encoded_docs_test = [one_hot(d, vocab_size) for d in texts_test ]

In [None]:
max_length = 1000
padded_docs = pad_sequences(encoded_docs_test, maxlen=max_length, padding='post')

In [None]:
padded_docs

In [None]:
pred = model.predict_classes(padded_docs)

In [None]:
pred.shape

# My submission

In [None]:
sample = pd.read_csv('../input/nlp-getting-started/sample_submission.csv')
my_submission = pd.DataFrame()
my_submission['id'] = sample['id']
my_submission['target'] = pred

my_submission.to_csv('Submission.csv', index=False)


# Importing WordCloud

In [None]:
from wordcloud import WordCloud, ImageColorGenerator
from os import path, getcwd
from PIL import Image

In [None]:
data=pd.read_csv('../input/nlp-getting-started/train.csv')

In [None]:
#Creating the text variable for positve reviews
neg=data.loc[data['target']==0].reset_index(drop=True)
neg.head()

# Adding Text to a Variable
text=neg['text'][5]
# Creating the Word Cloud
wordcloud = WordCloud().generate(text)
# Plotting the Word Cloud
plt.figure(figsize = (20,20))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

In [None]:
#Creating the text variable for positve reviews
pos=data.loc[data['target']==1].reset_index(drop=True)
pos.head()

# Adding Text to a Variable
text=pos['text'][5]
# Creating the Word Cloud
wordcloud = WordCloud().generate(text)
# Plotting the Word Cloud
plt.figure(figsize = (20,20))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()