## tensorflow_nl3.ipynb

## SaveModel (default) or HDF5 

In [4]:
import tensorflow as tf
import tensorflow_hub as hub

In [5]:
loaded_model_6 = tf.keras.models.load_model(
    "model_6.h5",
    custom_objects = {
        "KerasLayer" : hub.KerasLayer
    }
)

In [6]:
import pandas as pd

train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

train_df.head(), train_df.shape, test_df.head(), test_df.shape

(   id keyword location                                               text  \
 0   1     NaN      NaN  Our Deeds are the Reason of this #earthquake M...   
 1   4     NaN      NaN             Forest fire near La Ronge Sask. Canada   
 2   5     NaN      NaN  All residents asked to 'shelter in place' are ...   
 3   6     NaN      NaN  13,000 people receive #wildfires evacuation or...   
 4   7     NaN      NaN  Just got sent this photo from Ruby #Alaska as ...   
 
    target  
 0       1  
 1       1  
 2       1  
 3       1  
 4       1  ,
 (7613, 5),
    id keyword location                                               text
 0   0     NaN      NaN                 Just happened a terrible car crash
 1   2     NaN      NaN  Heard about #earthquake is different cities, s...
 2   3     NaN      NaN  there is a forest fire at spot pond, geese are...
 3   9     NaN      NaN           Apocalypse lighting. #Spokane #wildfires
 4  11     NaN      NaN      Typhoon Soudelor kills 28 in China 

In [7]:
train_df_shuffled = train_df.sample(frac = 1, random_state=42)
train_df_shuffled.head(), train_df_shuffled.shape

(        id      keyword               location  \
 2644  3796  destruction                    NaN   
 2227  3185       deluge                    NaN   
 5448  7769       police                     UK   
 132    191   aftershock                    NaN   
 6845  9810       trauma  Montgomery County, MD   
 
                                                    text  target  
 2644  So you have a new weapon that can cause un-ima...       1  
 2227  The f$&amp;@ing things I do for #GISHWHES Just...       0  
 5448  DT @georgegalloway: RT @Galloway4Mayor: ÛÏThe...       1  
 132   Aftershock back to school kick off was great. ...       0  
 6845  in response to trauma Children of Addicts deve...       0  ,
 (7613, 5))

In [8]:
print(f"총 학습할 데이터의 수 : {len(train_df)}")
print(f"총 테스트할 데이터의 수 : {len(test_df)}")
print(f"총 데이터의 수 : {len(train_df) + len(test_df)}")

총 학습할 데이터의 수 : 7613
총 테스트할 데이터의 수 : 3263
총 데이터의 수 : 10876


In [9]:
from sklearn.model_selection import train_test_split

train_sentences, val_sentences, train_labels, val_labels = train_test_split(
    train_df_shuffled["text"].to_numpy(),
    train_df_shuffled["target"].to_numpy(),
    test_size = 0.1,
    random_state= 42
)

In [10]:
len(train_sentences), len(train_labels), len(val_sentences), len(val_labels)

(6851, 6851, 762, 762)

In [11]:
loaded_model_6.evaluate(val_sentences, val_labels)



[0.42613252997398376, 0.8123359680175781]

In [12]:
loaded_model_6.save("model_6_SaveModel_format")



INFO:tensorflow:Assets written to: model_6_SaveModel_format\assets


INFO:tensorflow:Assets written to: model_6_SaveModel_format\assets


In [13]:
loaded_model_6_SaveModel = tf.keras.models.load_model("model_6_SaveModel_format")

In [14]:
loaded_model_6_SaveModel.evaluate(val_sentences, val_labels)



[0.42613252997398376, 0.8123359680175781]

In [15]:
model_6_pred_probs = loaded_model_6_SaveModel.predict(val_sentences)
model_6_preds = tf.squeeze(tf.round(model_6_pred_probs))

In [19]:
val_df = pd.DataFrame({
    "text": val_sentences,
    "target": val_labels,
    "pred": model_6_preds,
    "pred_prob": tf.squeeze(model_6_pred_probs)
})
val_df

Unnamed: 0,text,target,pred,pred_prob
0,DFR EP016 Monthly Meltdown - On Dnbheaven 2015...,0,0.0,0.198712
1,FedEx no longer to transport bioterror germs i...,0,1.0,0.748079
2,Gunmen kill four in El Salvador bus attack: Su...,1,1.0,0.986122
3,@camilacabello97 Internally and externally scr...,1,0.0,0.217625
4,Radiation emergency #preparedness starts with ...,1,1.0,0.693689
...,...,...,...,...
757,That's the ultimate road to destruction,0,0.0,0.150539
758,@SetZorah dad why dont you claim me that mean ...,0,0.0,0.100686
759,FedEx will no longer transport bioterror patho...,0,1.0,0.875329
760,Crack in the path where I wiped out this morni...,0,1.0,0.702966


In [20]:
most_wrong = val_df[val_df["target"] != val_df["pred"]].sort_values("pred_prob", ascending = False)
most_wrong[:10]

Unnamed: 0,text,target,pred,pred_prob
31,? High Skies - Burning Buildings ? http://t.co...,0,1.0,0.933125
759,FedEx will no longer transport bioterror patho...,0,1.0,0.875329
628,@noah_anyname That's where the concentration c...,0,1.0,0.854831
49,@madonnamking RSPCA site multiple 7 story high...,0,1.0,0.846544
209,Ashes 2015: AustraliaÛªs collapse at Trent Br...,0,1.0,0.831237
393,@SonofLiberty357 all illuminated by the bright...,0,1.0,0.823089
251,@AshGhebranious civil rights continued in the ...,0,1.0,0.805122
698,åÈMGN-AFRICAå¨ pin:263789F4 åÈ Correction: Ten...,0,1.0,0.803008
109,[55436] 1950 LIONEL TRAINS SMOKE LOCOMOTIVES W...,0,1.0,0.8029
381,Deaths 3 http://t.co/nApviyGKYK,0,1.0,0.776191


In [21]:
# false positive 확인 
for row in most_wrong[:10].itertuples():
    _, text, target, pred, prob = row
    print(f"Target : {target}, Pred: {int(pred)}, Prob: {prob}")
    print(f"Text:\n{text}\n")
    print("----\n")

Target : 0, Pred: 1, Prob: 0.9331251382827759
Text:
? High Skies - Burning Buildings ? http://t.co/uVq41i3Kx2 #nowplaying

----

Target : 0, Pred: 1, Prob: 0.8753292560577393
Text:
FedEx will no longer transport bioterror pathogens in wake of anthrax lab mishaps http://t.co/lHpgxc4b8J

----

Target : 0, Pred: 1, Prob: 0.8548308610916138
Text:
@noah_anyname That's where the concentration camps and mass murder come in. 
 
EVERY. FUCKING. TIME.

----

Target : 0, Pred: 1, Prob: 0.8465442657470703
Text:
@madonnamking RSPCA site multiple 7 story high rise buildings next to low density character residential in an area that floods

----

Target : 0, Pred: 1, Prob: 0.8312370777130127
Text:
Ashes 2015: AustraliaÛªs collapse at Trent Bridge among worst in history: England bundled out Australia for 60 ... http://t.co/t5TrhjUAU0

----

Target : 0, Pred: 1, Prob: 0.823088526725769
Text:
@SonofLiberty357 all illuminated by the brightly burning buildings all around the town!

----

Target : 0, Pred:

In [23]:
# false negative 확인 
for row in most_wrong[-10:].itertuples():
    _, text, target, pred, prob = row
    print(f"Target : {target}, Pred: {int(pred)}, Prob: {prob}")
    print(f"Text:\n{text}\n")
    print("----\n")

Target : 1, Pred: 0, Prob: 0.06737545132637024
Text:
going to redo my nails and watch behind the scenes of desolation of smaug ayyy

----

Target : 1, Pred: 0, Prob: 0.06604376435279846
Text:
'The way you move is like a full on rainstorm and I'm a house of cards'

----

Target : 1, Pred: 0, Prob: 0.06545454263687134
Text:
@willienelson We need help! Horses will die!Please RT &amp; sign petition!Take a stand &amp; be a voice for them! #gilbert23 https://t.co/e8dl1lNCVu

----

Target : 1, Pred: 0, Prob: 0.06507685780525208
Text:
New post from @darkreading http://t.co/8eIJDXApnp New SMB Relay Attack Steals User Credentials Over Internet

----

Target : 1, Pred: 0, Prob: 0.05454438924789429
Text:
@SoonerMagic_ I mean I'm a fan but I don't need a girl sounding off like a damn siren

----

Target : 1, Pred: 0, Prob: 0.053053438663482666
Text:
You can never escape me. Bullets don't harm me. Nothing harms me. But I know pain. I know pain. Sometimes I share it. With someone like you.

----

Tar

In [24]:
import random

test_sentences = test_df["text"].to_list()
test_samples = random.sample(test_sentences, 10)
for test_sample in test_samples:
    pred_prob = tf.squeeze(loaded_model_6_SaveModel.predict([test_sample]))
    pred = tf.round(pred_prob)
    print(f"Pred: {int(pred)}, Prob: {pred_prob}")
    print(f"Text: \n{test_sample}\n")
    print("----\n")
    

Pred: 0, Prob: 0.07988095283508301
Text: 
It's a trap not a fucking game. U ain't trapping if u ain't trapped

----

Pred: 0, Prob: 0.23009538650512695
Text: 
my wife has opted to take holidays &amp; historically since we met @lanahillman gets special treatment her bonfires can't start wild fires-RAIN!

----

Pred: 0, Prob: 0.17310187220573425
Text: 
So that it'll electrocute somebody's ass baa...No thank you https://t.co/x7P1xaBWTz

----

Pred: 0, Prob: 0.07406219840049744
Text: 
@TypeEd been a bit inundated w. Illustrative work  BUT would love to catch up! ??????

----

Pred: 1, Prob: 0.90939861536026
Text: 
Refugio oil spill may have been costlier bigger than projected http://t.co/d7FdCLU404

----

Pred: 0, Prob: 0.08046174049377441
Text: 
@alexhern he created vr only he can destroy it.

----

Pred: 0, Prob: 0.3765888810157776
Text: 
Our wounds can so easily turn us into people we don't want to be and we hardly see it happening

----

Pred: 0, Prob: 0.2447885274887085
Text: 
@Ryanai

In [25]:
my_tweet = "Life like an ensemble: take the best choices from others and make your own"

In [26]:
def predict_on_sentence(model, sentence):
    pred_prob = model.predict([sentence])
    pred_label = tf.squeeze(tf.round(pred_prob)).numpy()
    print(f"Pred: {pred_label}", "부정" if pred_label > 0 else "긍정", f"Prob: {pred_prob[0][0]}")
    print(f"Text: \n{sentence}")

In [27]:
predict_on_sentence(
    model = loaded_model_6_SaveModel,
    sentence = my_tweet
)

Pred: 0.0 긍정 Prob: 0.045455485582351685
Text: 
Life like an ensemble: take the best choices from others and make your own


In [28]:
bad_news1 = "Reports that the smoke in Beirut sky contains nitric acid, which is toxic. Please share and refrain from stepping outside unless urgent. #Lebanon"
bad_news2 = "#Beirut declared a “devastated city”, two-week state of emergency officially declared. #Lebanon"

In [29]:
predict_on_sentence(
    model = loaded_model_6_SaveModel,
    sentence = bad_news1
)

Pred: 1.0 부정 Prob: 0.969714343547821
Text: 
Reports that the smoke in Beirut sky contains nitric acid, which is toxic. Please share and refrain from stepping outside unless urgent. #Lebanon


In [30]:
predict_on_sentence(
    model = loaded_model_6_SaveModel,
    sentence = bad_news2
)

Pred: 1.0 부정 Prob: 0.9734008312225342
Text: 
#Beirut declared a “devastated city”, two-week state of emergency officially declared. #Lebanon


In [51]:
my_opinion = "I my_opinion a baseball!"

In [52]:
predict_on_sentence(
    model = loaded_model_6_SaveModel,
    sentence = my_opinion
)

Pred: 0.0 긍정 Prob: 0.11191290616989136
Text: 
I my_opinion a baseball!
