<a href="https://colab.research.google.com/github/philipayazi/Disaster_Tweets/blob/master/Disaster_Tweets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [28]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

In [29]:
import pickle

# Getting back the objects:
with open('w2v_features.pkl', 'rb') as f:  # Python 3: open(..., 'rb')
    test_tweet_matrix, train_tweet_matrix, y_train_split, y_test_split = pickle.load(f)


## DL Classifier from W2V Features

In [30]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [31]:
X_train =train_tweet_matrix
X_test =test_tweet_matrix
y_train =y_train_split
y_test =y_test_split

print('Train X and y', X_train.shape, y_train.shape)
print('Test X and y', X_test.shape, y_test.shape)

Train X and y (5329, 300) (5329,)
Test X and y (2284, 300) (2284,)


In [32]:
#---------------- No scaling gives better results
X_train_scaled =X_train
X_test_scaled =X_test

In [33]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [34]:
y_train_categorical.shape

(5329, 2)

### Create DL model

In [35]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [36]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=300))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [37]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [38]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_4 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_5 (Dense)              (None, 2)                 202       
Total params: 40,402
Trainable params: 40,402
Non-trainable params: 0
_________________________________________________________________


In [39]:
#fit(x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0.0, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, validation_freq=1, max_queue_size=10, workers=1, use_multiprocessing=False)
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=10,  #60,
    shuffle=True,
    verbose=2,
    validation_split=0.15
)

Train on 4529 samples, validate on 800 samples
Epoch 1/10
4529/4529 - 1s - loss: 0.4956 - accuracy: 0.7695 - val_loss: 0.4573 - val_accuracy: 0.7975
Epoch 2/10
4529/4529 - 0s - loss: 0.4162 - accuracy: 0.8176 - val_loss: 0.4467 - val_accuracy: 0.7975
Epoch 3/10
4529/4529 - 0s - loss: 0.3784 - accuracy: 0.8384 - val_loss: 0.4584 - val_accuracy: 0.8000
Epoch 4/10
4529/4529 - 0s - loss: 0.3494 - accuracy: 0.8516 - val_loss: 0.4556 - val_accuracy: 0.8037
Epoch 5/10
4529/4529 - 0s - loss: 0.3140 - accuracy: 0.8719 - val_loss: 0.4718 - val_accuracy: 0.8012
Epoch 6/10
4529/4529 - 0s - loss: 0.2762 - accuracy: 0.8856 - val_loss: 0.5021 - val_accuracy: 0.8050
Epoch 7/10
4529/4529 - 1s - loss: 0.2425 - accuracy: 0.9017 - val_loss: 0.5372 - val_accuracy: 0.8163
Epoch 8/10
4529/4529 - 0s - loss: 0.2094 - accuracy: 0.9168 - val_loss: 0.5916 - val_accuracy: 0.8012
Epoch 9/10
4529/4529 - 0s - loss: 0.1809 - accuracy: 0.9304 - val_loss: 0.7033 - val_accuracy: 0.7850
Epoch 10/10
4529/4529 - 0s - loss: 

<tensorflow.python.keras.callbacks.History at 0x27f83696a88>

In [40]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=3)
print(
    f"RESULTS - Loss: {model_loss}, Accuracy: {model_accuracy}")

RESULTS - Loss: 0.6631493313456583, Accuracy: 0.8012259006500244


In [41]:
encoded_predictions = model.predict_classes(X_test_scaled[:5])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [42]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: [0 0 0 0 0]
Actual Labels: [1, 0, 1, 0, 0]


In [43]:
model.evaluate(X_test_scaled, y_test_categorical, verbose=1)



[0.6631493313456583, 0.8012259]

In [44]:
## ---------- Pred Rate, Prec Rate, F1 score

from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, confusion_matrix

y_pred1 = model.predict(X_test_scaled)
y_pred = np.argmax(y_pred1, axis=1)

# Print f1, precision, and recall scores
y_test1 = y_test

print('ACC: ', accuracy_score(y_test1, y_pred))

print('RE: ', recall_score(y_test1, y_pred , average="macro"))
print('PR: ', precision_score(y_test1, y_pred , average="macro"))
print('F1: ', f1_score(y_test1, y_pred , average="macro"))

ACC:  0.8012259194395797
RE:  0.7908604228126561
PR:  0.7989759637276084
F1:  0.7938510132738719


## Hyperas ... for hyperparams optimization

In [45]:
def data():
    with open('w2v_features.pkl', 'rb') as f:  
        test_tweet_matrix, train_tweet_matrix, y_train_split, y_test_split = pickle.load(f)
        
    X_train =train_tweet_matrix
    X_test =test_tweet_matrix
        
    # Step 1: Label-encode data set
    label_encoder = LabelEncoder()
    label_encoder.fit(y_train_split)
    encoded_y_train = label_encoder.transform(y_train_split)
    encoded_y_test = label_encoder.transform(y_test_split)

    # Step 2: Convert encoded labels to one-hot-encoding
    y_train_categorical = to_categorical(encoded_y_train)
    y_test_categorical = to_categorical(encoded_y_test)

    y_train =y_train_categorical
    y_test =y_test_categorical 
    
    X_scaler = StandardScaler().fit(X_train)
    
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)
    y_train_scaled = y_train
    y_test_scaled = y_test

    x_train = X_train_scaled
    x_test = X_test_scaled
    y_train = y_train_scaled
    y_test = y_test_scaled
   
    return x_train, y_train, x_test, y_test

In [46]:
from hyperopt import Trials, STATUS_OK, tpe
from keras.layers.core import Dense, Dropout, Activation
from keras.models import Sequential
from keras.utils import np_utils
from hyperas import optim
from hyperas.distributions import choice, uniform
from keras import backend as K
from keras.utils import print_summary
          
              
def create_model(x_train, y_train, x_test, y_test):
    print(x_train.shape)
    print(y_train.shape)
    print(x_test.shape)
    print(y_test.shape)
    
    model= Sequential() 
    model.add(Dense( {{choice([50,100,500])}} , activation='relu', input_dim=300))
    model.add(Dense( {{choice([50,100,500])}} , activation='relu'))
    model.add(Dense(units=2, activation='softmax'))
    
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])   
    print_summary(model, line_length=None, positions=None, print_fn=None)
    result= model.fit(x_train, y_train,
                      batch_size={{choice([64,128])}},
                      epochs={{choice([10,50,100])}},
                      verbose=2,
                      validation_split =0.15)
    
    score = model.evaluate(x_test, y_test, verbose=0)
    accuracy = score[1]
  #  return {'loss': -accuracy, 'status': STATUS_OK, 'model': model}
    return {'loss': accuracy, 'status': STATUS_OK, 'model': model}

best_run, best_model= optim.minimize(model=create_model,
                                     data=data,
                                     algo=tpe.suggest,
                                     max_evals=5,
                                     trials=Trials(),
                                     notebook_name='Disaster_Tweets_hyperasW2VDL')


>>> Imports:
#coding=utf-8

try:
    import pandas as pd
except:
    pass

try:
    import numpy as np
except:
    pass

try:
    from sklearn.model_selection import train_test_split
except:
    pass

try:
    from sklearn.metrics import classification_report, confusion_matrix
except:
    pass

try:
    from sklearn.preprocessing import StandardScaler
except:
    pass

try:
    import pickle
except:
    pass

try:
    from sklearn.model_selection import train_test_split
except:
    pass

try:
    from sklearn.preprocessing import LabelEncoder, MinMaxScaler
except:
    pass

try:
    from tensorflow.keras.utils import to_categorical
except:
    pass

try:
    from tensorflow.keras.models import Sequential
except:
    pass

try:
    from tensorflow.keras.layers import Dense
except:
    pass

try:
    from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, confusion_matrix
except:
    pass

try:
    from hyperopt import Trials, STATUS_OK, tpe
except:
    pass


Epoch 6/100                                                                                                                                      
 - 0s - loss: 0.1438 - accuracy: 0.9554 - val_loss: 0.5702 - val_accuracy: 0.7837                                                                

Epoch 7/100                                                                                                                                      
 - 0s - loss: 0.1140 - accuracy: 0.9713 - val_loss: 0.6665 - val_accuracy: 0.7738                                                                

Epoch 8/100                                                                                                                                      
 - 0s - loss: 0.1006 - accuracy: 0.9726 - val_loss: 0.6629 - val_accuracy: 0.7887                                                                

Epoch 9/100                                                                                                              

 - 0s - loss: 0.0404 - accuracy: 0.9848 - val_loss: 0.9990 - val_accuracy: 0.7800                                                                

Epoch 34/100                                                                                                                                     
 - 0s - loss: 0.0383 - accuracy: 0.9828 - val_loss: 1.0306 - val_accuracy: 0.7775                                                                

Epoch 35/100                                                                                                                                     
 - 0s - loss: 0.0404 - accuracy: 0.9817 - val_loss: 0.9838 - val_accuracy: 0.7788                                                                

Epoch 36/100                                                                                                                                     
 - 0s - loss: 0.0393 - accuracy: 0.9817 - val_loss: 1.0359 - val_accuracy: 0.7700                                        

Epoch 61/100                                                                                                                                     
 - 0s - loss: 0.0477 - accuracy: 0.9810 - val_loss: 1.0526 - val_accuracy: 0.7725                                                                

Epoch 62/100                                                                                                                                     
 - 0s - loss: 0.0450 - accuracy: 0.9806 - val_loss: 1.0909 - val_accuracy: 0.7800                                                                

Epoch 63/100                                                                                                                                     
 - 0s - loss: 0.0351 - accuracy: 0.9845 - val_loss: 1.1555 - val_accuracy: 0.7975                                                                

Epoch 64/100                                                                                                             

 - 0s - loss: 0.0272 - accuracy: 0.9841 - val_loss: 1.2965 - val_accuracy: 0.7713                                                                

Epoch 89/100                                                                                                                                     
 - 0s - loss: 0.0260 - accuracy: 0.9852 - val_loss: 1.3303 - val_accuracy: 0.7837                                                                

Epoch 90/100                                                                                                                                     
 - 0s - loss: 0.0254 - accuracy: 0.9845 - val_loss: 1.3668 - val_accuracy: 0.7862                                                                

Epoch 91/100                                                                                                                                     
 - 0s - loss: 0.0263 - accuracy: 0.9843 - val_loss: 1.3845 - val_accuracy: 0.7800                                        

 - 0s - loss: 0.2691 - accuracy: 0.8856 - val_loss: 0.5054 - val_accuracy: 0.7837                                                                

Epoch 7/10                                                                                                                                       
 - 0s - loss: 0.2348 - accuracy: 0.9073 - val_loss: 0.5504 - val_accuracy: 0.7962                                                                

Epoch 8/10                                                                                                                                       
 - 0s - loss: 0.2048 - accuracy: 0.9210 - val_loss: 0.5660 - val_accuracy: 0.7825                                                                

Epoch 9/10                                                                                                                                       
 - 0s - loss: 0.1740 - accuracy: 0.9397 - val_loss: 0.6309 - val_accuracy: 0.7975                                        

 - 0s - loss: 0.0745 - accuracy: 0.9770 - val_loss: 0.8723 - val_accuracy: 0.7650                                                                

Epoch 15/50                                                                                                                                      
 - 0s - loss: 0.0732 - accuracy: 0.9764 - val_loss: 0.9284 - val_accuracy: 0.7937                                                                

Epoch 16/50                                                                                                                                      
 - 0s - loss: 0.0659 - accuracy: 0.9797 - val_loss: 0.9604 - val_accuracy: 0.7887                                                                

Epoch 17/50                                                                                                                                      
 - 0s - loss: 0.0791 - accuracy: 0.9755 - val_loss: 0.9546 - val_accuracy: 0.7788                                        

Epoch 42/50                                                                                                                                      
 - 0s - loss: 0.0347 - accuracy: 0.9848 - val_loss: 1.2865 - val_accuracy: 0.7663                                                                

Epoch 43/50                                                                                                                                      
 - 0s - loss: 0.0374 - accuracy: 0.9806 - val_loss: 1.3253 - val_accuracy: 0.7800                                                                

Epoch 44/50                                                                                                                                      
 - 0s - loss: 0.0346 - accuracy: 0.9821 - val_loss: 1.3367 - val_accuracy: 0.7825                                                                

Epoch 45/50                                                                                                              

Epoch 10/50                                                                                                                                      
 - 0s - loss: 0.1299 - accuracy: 0.9611 - val_loss: 0.6400 - val_accuracy: 0.7763                                                                

Epoch 11/50                                                                                                                                      
 - 0s - loss: 0.1112 - accuracy: 0.9680 - val_loss: 0.6756 - val_accuracy: 0.7688                                                                

Epoch 12/50                                                                                                                                      
 - 0s - loss: 0.0991 - accuracy: 0.9715 - val_loss: 0.6784 - val_accuracy: 0.7750                                                                

Epoch 13/50                                                                                                              

 - 0s - loss: 0.0517 - accuracy: 0.9810 - val_loss: 0.9605 - val_accuracy: 0.7775                                                                

Epoch 38/50                                                                                                                                      
 - 0s - loss: 0.0452 - accuracy: 0.9808 - val_loss: 0.9921 - val_accuracy: 0.7663                                                                

Epoch 39/50                                                                                                                                      
 - 0s - loss: 0.0431 - accuracy: 0.9826 - val_loss: 0.9813 - val_accuracy: 0.7725                                                                

Epoch 40/50                                                                                                                                      
 - 0s - loss: 0.0469 - accuracy: 0.9806 - val_loss: 1.0230 - val_accuracy: 0.7825                                        

 - 0s - loss: 0.1778 - accuracy: 0.9382 - val_loss: 0.5798 - val_accuracy: 0.7875                                                                

Epoch 6/10                                                                                                                                       
 - 0s - loss: 0.1263 - accuracy: 0.9625 - val_loss: 0.6035 - val_accuracy: 0.7900                                                                

Epoch 7/10                                                                                                                                       
 - 0s - loss: 0.1110 - accuracy: 0.9640 - val_loss: 0.6850 - val_accuracy: 0.7688                                                                

Epoch 8/10                                                                                                                                       
 - 0s - loss: 0.1033 - accuracy: 0.9693 - val_loss: 0.6634 - val_accuracy: 0.7862                                        