In [None]:
import numpy as np
import pandas as pd 
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM
from sklearn.model_selection import train_test_split
import chardet

In [None]:
with open('../input/sms-spam-collection-dataset/spam.csv', 'rb') as rawdata:
    result = chardet.detect(rawdata.read())
df = pd.read_csv('../input/sms-spam-collection-dataset/spam.csv',encoding=result['encoding'])
df=df.sample(frac=1) #shuffle dataset

In [None]:
df.head()

In [None]:
df["label"]=df["v1"]
df["message"]=df["v2"]
df.drop(["Unnamed: 2","Unnamed: 3","Unnamed: 4", "v1", "v2"],axis=1,inplace=True)

In [None]:
df.head()

In [None]:
max_features = 1000000
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(df['message'].values)
X = tokenizer.texts_to_sequences(df['message'].values)
X = pad_sequences(X)

In [None]:
model = Sequential()
model.add(Embedding(max_features, 128, input_length = X.shape[1], trainable=True))
model.add(LSTM(100))
model.add(Dense(2,activation='sigmoid'))
model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
print(model.summary())

In [None]:
Y = pd.get_dummies(df['label']).values
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2) #split dataset to 80% training, 20% testing
print("X_train Shape: ",X_train.shape)
print("Y_train Shape: ",Y_train.shape)
print("X_test Shape: ",X_test.shape)
print("Y_test Shape: ",Y_test.shape)

In [None]:
batch_size = 32
history=model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs = 2, batch_size=batch_size)

In [None]:
def predict_message(message):
    seq = tokenizer.texts_to_sequences(message)

    padded = pad_sequences(seq, maxlen=X.shape[1])

    pred = model.predict(padded)

    labels = ['not spam','spam']
    print(labels[np.argmax(pred)])

In [None]:
message = ['Congratulations! you have won a $1,000 Walmart gift card. Go to http://bit.ly/123456 to claim now.']
predict_message(message)

In [None]:
message = ['thanks for accepting my request to connect']
predict_message(message)

In [None]:
message = ['i am going to the market. Do you want anything?']
predict_message(message)

In [None]:
message = ['You won 100 dollars. Go to this link kaggle.com and claim']
predict_message(message)

In [None]:
message = ['I updated the code. Can you check it from this link kaggle.com']
predict_message(message)

In [None]:
message = ['Congratz you won a 100 dollar check. Click here to claim your money']
predict_message(message)

In [None]:
message = ['Thank you for doing this job. Click the link to get your money.']
predict_message(message)