# Supervised and Unsupervised Modeling

In [426]:
import pandas as pd
import nltk
import numpy as np
import regex as re
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from nltk.stem import WordNetLemmatizer
from nltk.tokenize import RegexpTokenizer
from nltk.stem.porter import PorterStemmer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score

from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

In [427]:
df = pd.read_csv('./Datasets/big_df.csv')

### Road Closure Classification

In [428]:
df.head(5)

Unnamed: 0,screen_name,username,user_id,tweet_id,tweet_url,timestamp,timestamp_epochs,text,text_html,links,...,img_urls,video_url,likes,retweets,replies,is_replied,is_reply_to,parent_tweet_id,reply_to_users,query
0,EPCFIRM,EPCF #GreenNewDeal #SunriseMovement,836834627771850752,1188564512669962240,/EPCFIRM/status/1188564512669962240,2019-10-27 21:13:56,1572210836,Our hearts go out to Californians affected by ...,"<p class=""TweetTextSize js-tweet-text tweet-te...",[],...,['https://pbs.twimg.com/media/EH6hH4FVAAA6ZA_....,,8,1,1,True,False,,[],saddleridgefire
1,jsingpubhealth,Jason Singson,3320904614,1188545671076298752,/jsingpubhealth/status/1188545671076298752,2019-10-27 19:59:04,1572206344,"From the #SaddleridgeFire to the #KincadeFire,...","<p class=""TweetTextSize js-tweet-text tweet-te...",['https://twitter.com/CAgovernor/status/118852...,...,[],,1,0,0,False,False,,[],saddleridgefire
2,BrandyCGrote,Brandy Grote,898902973,1188517925503635456,/BrandyCGrote/status/1188517925503635456,2019-10-27 18:08:49,1572199729,Our #SaddleridgeFire leaped a 12 lane major fr...,"<p class=""TweetTextSize js-tweet-text tweet-te...",[],...,[],,1,1,0,False,True,1.188496e+18,"[{'screen_name': 'BillMendocino', 'user_id': '...",saddleridgefire
3,mlzema,Mark In SoCal,18340526,1188515108562931714,/mlzema/status/1188515108562931714,2019-10-27 17:57:38,1572199058,"Good Morning, Sam!\nAll is well here. However,...","<p class=""TweetTextSize js-tweet-text tweet-te...",['https://twitter.com/Sam17970863/status/11885...,...,[],,1,0,0,False,False,,[],saddleridgefire
4,LVTIA,LVTIA,87142711,1188502337867829248,/LVTIA/status/1188502337867829248,2019-10-27 17:06:53,1572196013,Was your property or home damaged by the #Sadd...,"<p class=""TweetTextSize js-tweet-text tweet-te...",['https://www.211la.org/fire-saddleridge'],...,[],,0,0,0,False,False,,[],saddleridgefire


In [429]:
df.columns

Index(['screen_name', 'username', 'user_id', 'tweet_id', 'tweet_url',
       'timestamp', 'timestamp_epochs', 'text', 'text_html', 'links',
       'hashtags', 'has_media', 'img_urls', 'video_url', 'likes', 'retweets',
       'replies', 'is_replied', 'is_reply_to', 'parent_tweet_id',
       'reply_to_users', 'query'],
      dtype='object')

In [430]:
## Drop unnecessary columns 
df_text_query = df.drop(columns= ['screen_name', 'user_id', 'tweet_id', 'tweet_url', 'timestamp', 'timestamp_epochs', 'text_html', 'links', 'hashtags', 'has_media', 'img_urls', 'video_url', 'likes', 'retweets', 'replies', 'is_replied', 'is_reply_to', 'parent_tweet_id', 'reply_to_users'])

### Check for Null Values

In [431]:
df_text_query.isnull().sum()

username      0
text        105
query         0
dtype: int64

In [432]:
df_text_query.dropna(axis=0, how='any', inplace=True)

In [433]:
df_text_query.head()

Unnamed: 0,username,text,query
0,EPCF #GreenNewDeal #SunriseMovement,Our hearts go out to Californians affected by ...,saddleridgefire
1,Jason Singson,"From the #SaddleridgeFire to the #KincadeFire,...",saddleridgefire
2,Brandy Grote,Our #SaddleridgeFire leaped a 12 lane major fr...,saddleridgefire
3,Mark In SoCal,"Good Morning, Sam!\nAll is well here. However,...",saddleridgefire
4,LVTIA,Was your property or home damaged by the #Sadd...,saddleridgefire


In [434]:
df_text_query.isnull().sum()

username    0
text        0
query       0
dtype: int64

In [435]:
df_text_query.shape

(9075, 3)

### Cleaning Tweets using RegEx

In [436]:
## Lower case texts
df_text_query['text'] = df_text_query['text'].str.lower()

In [437]:
## Removing line breaks
df_text_query['text'] = df_text_query['text'].map(lambda x: re.sub('\/\/', ' ', x)) 

In [438]:
## Removing Apostrophes
df_text_query['text'] = df_text_query['text'].map(lambda x: re.sub('[\\][\']', '', x))

In [439]:
## Removing URLs
df_text_query['text'] = df_text_query['text'].map(lambda x: re.sub('http[s]?:\/\/[^\s]*', ' ', x))

In [440]:
df_text_query.head(5)

Unnamed: 0,username,text,query
0,EPCF #GreenNewDeal #SunriseMovement,our hearts go out to californians affected by ...,saddleridgefire
1,Jason Singson,"from the #saddleridgefire to the #kincadefire,...",saddleridgefire
2,Brandy Grote,our #saddleridgefire leaped a 12 lane major fr...,saddleridgefire
3,Mark In SoCal,"good morning, sam!\nall is well here. however,...",saddleridgefire
4,LVTIA,was your property or home damaged by the #sadd...,saddleridgefire


### Road Closure Classification

#### We want to determine if a road was open or closed. We achieved this by passing our dataframe through the filters we created

In [441]:
# establish list of words to search for in dataframe
road_keywords = ['road', 'st','rd', 'hwy', 'highway', 'ave', 'avenue', 'street', 'boulevard'
                 'intersection', 'bridge', 'sr-', 'cr-', 'us-', 'i-', 'blvd']

# list of words associated with road closures
closed_keywords = ['closed', 'remains closed', 'shut down', 'backed up',
                   'no travel', 'delay', 'blocked', 'delays',
                   'disabled', 'traffic', 'fire', 'closures', 'closure']

to_drop = ["open", "opened", "lifted", "reopened", "clear", "cleared"]

In [442]:
df_text_query['is_road_closed']= df_text_query['text'].apply(lambda x: 1 if ((any(word in x.split() for word in road_keywords))
                                                           & (any(word in x.split() for word in closed_keywords))
                                                           & (not any(word in x.split() for word in to_drop))
                                                          ) else 0)

In [443]:
df_text_query.head()

Unnamed: 0,username,text,query,is_road_closed
0,EPCF #GreenNewDeal #SunriseMovement,our hearts go out to californians affected by ...,saddleridgefire,0
1,Jason Singson,"from the #saddleridgefire to the #kincadefire,...",saddleridgefire,0
2,Brandy Grote,our #saddleridgefire leaped a 12 lane major fr...,saddleridgefire,0
3,Mark In SoCal,"good morning, sam!\nall is well here. however,...",saddleridgefire,0
4,LVTIA,was your property or home damaged by the #sadd...,saddleridgefire,0


#### Baseline Comparison

In [444]:
df_text_query['is_road_closed'].value_counts()

0    8762
1     313
Name: is_road_closed, dtype: int64

In [445]:
df_text_query['is_road_closed'].value_counts(normalize = True)

0    0.96551
1    0.03449
Name: is_road_closed, dtype: float64

In [446]:
## assign road closed or open dataframes
closed_road = df_text_query[df_text_query['is_road_closed'] == 1]
open_road = df_text_query[df_text_query['is_road_closed'] == 0]

In [447]:
closed_road.shape

(313, 4)

In [448]:
closed_road.head()

Unnamed: 0,username,text,query,is_road_closed
272,News Source LA,@dwp blocked san fernando rd during the #saddl...,saddleridgefire,1
281,News Source LA,we had the same thing from @dwp security on sa...,saddleridgefire,1
502,Donald Lindsay,oh noes. #realfire in santa barbara co may hav...,saddleridgefire,1
620,Allison McHaney,today across the street from olive vista middl...,saddleridgefire,1
637,Allison McHaney,today across the street from olive vista middl...,saddleridgefire,1


In [449]:
open_road.shape

(8762, 4)

In [450]:
# find which handles tweeted the most about road closures
closed_road['username'].value_counts()

Go511                                  139
Caltrans District 7                     56
Brian Douglas                           19
scott burt                              12
Desmond Shaw                            11
KNX 1070 NEWSRADIO                       6
DENISE FONDO                             3
Spectrum News 1 SoCal                    3
Belen De Leon                            3
TCEP                                     2
Jeannette johnson                        2
Ventura Co. Sheriff                      2
Michael Fleming                          2
CHP - Inland Communications              2
Justin Bonney Real Estate                2
News Source LA                           2
Mkelly                                   2
Fullerton Road Grade Separation          2
EMS Daily                                2
Los Angeles Traffic                      2
BoardwalkFunCenter                       2
Allison McHaney                          2
JoAnn Bush                               2
JoAnn @ ESA

In [451]:
closed_road.shape

(313, 4)

## Declaring a Tokenizer Function:

In [452]:
tokenizer = RegexpTokenizer(r'\s+', gaps=True)

def tokenizing_function(df_col):

    string = ''
    
    try:
    
        for post in df_col:
            string += ' ' + post
            
    except:
        pass
    
    all_tokens = tokenizer.tokenize(string)
    
    return all_tokens

### Calling Tokenizer Function on `text` Column and Saving as Variable

In [481]:
text_tokens = tokenizing_function(df['text'])

## Declaring Count Vectorizer Function:

In [454]:
cvec = CountVectorizer(stop_words= 'english', min_df=5)

def cvec_function(df_col):
    cvec_matrix = cvec.fit_transform(df_col)

    #Convert to DataFrame
    cvec_df = pd.DataFrame(cvec_matrix.toarray(),
                          columns=cvec.get_feature_names())
    return cvec_df

### Calling the cvec_function with `text` column and saving the matrix as `text_cvec_df` variable

In [455]:
hash_cvec_df = cvec_function(df['hashtags'])
hash_cvec_df

Unnamed: 0,101fwy,105fwy,10fwy,110fwy,118fwy,134fwy,14freeway,14fwy,1582,15fwy,...,westhollywood,westla,wevapewevote,wildfire,wildfirepic,wildfires,wolffire,woodlandhills,woolseyfire,yeswx
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9175,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9176,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9177,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9178,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Viewing Top 50 Words in `text` column:

In [456]:
hash_cvec_df.sum().sort_values(ascending=False).head(50)

saddleridgefire        1390
tickfire                777
knxtraffic              377
latraffic               285
gettyfire               218
kincadefire             189
californiafires         112
easyfire                111
losangeles              101
rt                       96
sigalert                 94
knxtrafficpic            90
california               90
wildfires                81
5fwy                     76
santaclarita             72
sylmar                   71
lacofd                   67
palisadesfire            64
porterranch              63
ca25                     59
wildfire                 58
405fwy                   58
101fwy                   56
lafd                     53
firefighters             52
californiawildfires      50
pulsepointconnected      49
60fwy                    42
605fwy                   42
saddleridge              42
110fwy                   41
sandalwoodfire           39
kincaidfire              39
oakfire                  38
oldwaterfire        

### Setting up data for modeling

In [457]:
X = df_text_query['text']
y = df_text_query['is_road_closed']

In [458]:
X.dtypes

dtype('O')

In [459]:
y.dtypes

dtype('int64')

In [460]:
## Train, test and split the data
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    stratify=y,
                                                    random_state=42)

In [461]:
## Baseline Predictions
y_test.value_counts()

0    2892
1     103
Name: is_road_closed, dtype: int64

In [482]:
y_test.value_counts(normalize=True)

0    0.965609
1    0.034391
Name: is_road_closed, dtype: float64

## CountVectorization/Logistic Regression Model

In [464]:
pipe = Pipeline([
    ('cvec', CountVectorizer()),
    ('logreg', LogisticRegression(solver = 'lbfgs'))
])

params = {
    'cvec__stop_words' : [None, 'english'],
    'logreg__penalty' : ['none','l2'],
    'cvec__max_features': [2000, 3000, 4000, 5000],
    'cvec__min_df': [2, 3],
    'cvec__max_df': [.9, .95],
    'cvec__ngram_range': [(1,1), (1,2)]
}

In [465]:
gs = GridSearchCV(pipe, # what object are we optimizing?
                  params, # what parameters values are we searching?
                  cv=5) # 5-fold cross-validation.

In [466]:
# Fit GridSearch to training data.
gs.fit(X_train, y_train)

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=Pipeline(memory=None,
                                steps=[('cvec',
                                        CountVectorizer(analyzer='word',
                                                        binary=False,
                                                        decode_error='strict',
                                                        dtype=<class 'numpy.int64'>,
                                                        encoding='utf-8',
                                                        input='content',
                                                        lowercase=True,
                                                        max_df=1.0,
                                                        max_features=None,
                                                        min_df=1,
                                                        ngram_range=(1, 1),
                                            

In [467]:
# Save best model as gs_model.
gs_model = gs.best_estimator_

In [468]:
# Score model on training set.
gs_model.score(X_train, y_train)

0.9993421052631579

In [469]:
# Score model on testing set.
gs_model.score(X_test, y_test)

0.9863105175292154

In [470]:
preds = gs.best_estimator_.predict(X_test)

#### Look at the coefficients of CountVectorization/Logistic Model

In [487]:
# look at the coefficients
coefs = gs.best_estimator_.named_steps['logreg'].coef_[0]
features = gs.best_estimator_.named_steps['cvec'].get_feature_names()

In [488]:
coef_df = pd.DataFrame({'features' : features,
             'coefficients': coefs})

In [489]:
coef_df.sort_values('coefficients', ascending = False)

Unnamed: 0,features,coefficients
3567,road,2.615889
3569,road closures,1.678941
1107,closures,1.657915
2147,hwy,1.527241
661,avenue,1.439073
...,...,...
2464,la,-0.688243
3887,south near,-0.722229
1538,escondido,-0.738734
3105,open,-0.782942


#### Evaluation and Confusion Matrix of CountVectorization/Logistic Regression Model

In [471]:
def nice_conmat(y_test, preds, classes):
    conmat = confusion_matrix(y_test, preds)
    
    print(f'Accuracy Score: {accuracy_score(y_test, preds)}')
    return pd.DataFrame(conmat, columns=['Predicted ' +class_ for class_ in classes], \
                index=['Actual '+ class_ for class_ in classes])

nice_conmat(y_test, preds, ['open', 'is_road_closed'])

Accuracy Score: 0.9863105175292154


Unnamed: 0,Predicted open,Predicted is_road_closed
Actual open,2886,6
Actual is_road_closed,35,68


In [483]:
TN = 2886
FN = 35
FP = 6
TP = 68

In [485]:
# Recall / Sensitivity for CountVect/LogReg
TP / (TP + FN)

0.6601941747572816

In [486]:
# Specificity for CountVect/LogReg
TN / (FP + TN)

0.9979253112033195

#### Type I Error (or False Positive) is a result that indicates that a given condition is present when it actually is not present. In our model, it would be incorrectly predicting the road was closed, when it is actually open. In our example this would cause less cars/traffic/people using a safe escape route. A low FP value would mean less routes were incorrectly flagged as road being closed, maximizing the amount of escape routes. 

#### Type II Error (or False Negative) is a result that indicates that a given condition is not present when it actually is present. In our model, it would be incorrectly predicting the road was open, when it is actually closed. With this model we would want to minimize Type II errors, it could potentially be catastrophic to send a evacuee into a dangerous situation not to mention cause an increase in traffic. Unfortunately, our data and modeling indicate a relatively high level of false negatives. 

## TFIDF/Logistic Regression Model

In [472]:
pipe_v2 = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('logreg', LogisticRegression(solver = 'lbfgs'))
])

params_2 = {
    'logreg__penalty' : ['none','l2'],
}

In [473]:
gs_2 = GridSearchCV(pipe_v2, # what object are we optimizing?
                  params_2, # what parameters values are we searching?
                  cv=5) # 5-fold cross-validation.

In [474]:
# Fit GridSearch to training data.
gs_2.fit(X_train, y_train)

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=Pipeline(memory=None,
                                steps=[('tfidf',
                                        TfidfVectorizer(analyzer='word',
                                                        binary=False,
                                                        decode_error='strict',
                                                        dtype=<class 'numpy.float64'>,
                                                        encoding='utf-8',
                                                        input='content',
                                                        lowercase=True,
                                                        max_df=1.0,
                                                        max_features=None,
                                                        min_df=1,
                                                        ngram_range=(1, 1),
                                         

In [475]:
# Save best model as gs_model.
gs_2_model = gs_2.best_estimator_

In [476]:
# Score model on training set.
gs_2_model.score(X_train, y_train)

1.0

In [477]:
# Score model on testing set.
gs_2_model.score(X_test, y_test)

0.9853088480801335

In [490]:
preds_tfidf = gs_2.best_estimator_.predict(X_test)

In [493]:
def nice_conmat(y_test, preds_tfidf, classes):
    conmat = confusion_matrix(y_test, preds_tfidf)
    
    print(f'Accuracy Score: {accuracy_score(y_test, preds_tfidf)}')
    return pd.DataFrame(conmat, columns=['Predicted ' +class_ for class_ in classes], \
                index=['Actual '+ class_ for class_ in classes])

nice_conmat(y_test, preds_tfidf, ['open', 'is_road_closed'])

Accuracy Score: 0.9853088480801335


Unnamed: 0,Predicted open,Predicted is_road_closed
Actual open,2874,18
Actual is_road_closed,26,77


In [495]:
TN_tfidf = 2874
FN_tfidf = 26
FP_tfidf = 18
TP_tfidf = 77

In [496]:
# Recall / Sensitivity for TFIDFt/LogReg
TP_tfidf / (TP_tfidf + FN_tfidf)

0.7475728155339806

In [497]:
# Specificity for TFIDF/LogReg
TN_tfidf / (FP_tfidf + TN_tfidf)

0.9937759336099585

## Filtering True Positives

In [498]:
df_text_query

Unnamed: 0,username,text,query,is_road_closed
0,EPCF #GreenNewDeal #SunriseMovement,our hearts go out to californians affected by ...,saddleridgefire,0
1,Jason Singson,"from the #saddleridgefire to the #kincadefire,...",saddleridgefire,0
2,Brandy Grote,our #saddleridgefire leaped a 12 lane major fr...,saddleridgefire,0
3,Mark In SoCal,"good morning, sam!\nall is well here. however,...",saddleridgefire,0
4,LVTIA,was your property or home damaged by the #sadd...,saddleridgefire,0
...,...,...,...,...
9175,Claudia Peschiutta,former state lawmaker says hes got a plan to c...,KNX1070,0
9176,Eden,totally makes sense to do this midday,KNX1070,0
9177,KNX 1070 NEWSRADIO,"a sanford, maine mom says her 9-year-old birac...",KNX1070,0
9178,Andy,"i noticed it in anaheim hills, it looks crazy!",KNX1070,0


In [500]:
len(preds_tfidf)

2995

In [504]:
x_test_indices = list(X_test.index)

In [507]:
x_test_df = df_text_query.loc[x_test_indices, :]
x_test_df.head()

Unnamed: 0,username,text,query,is_road_closed
8433,KNX 1070 NEWSRADIO,watch video of alanis morissette sharing her n...,KNX1070,0
6162,LACoFD Incident Alerts,they’re engines equipped with cafs (compressed...,LACoFD911,0
7271,Brian Douglas,brush fire burning in the ventura riverbed has...,KNX1070,0
1724,AJ Tenney,lost out on two days of work due to the #tickf...,tickfire,0
394,AI6YR,#tickfire some confusion over the 23121 coltra...,saddleridgefire,0


In [508]:
x_test_df['y_preds'] = preds_tfidf

In [511]:
road_closed_true = (x_test_df['is_road_closed'] == 1)
road_closed_pred = (x_test_df['y_preds'] == 1)

X_test_true_pos = x_test_df[road_closed_true & road_closed_pred]

In [519]:
X_test_true_pos.head(20)

Unnamed: 0,username,text,query,is_road_closed,y_preds
7145,Go511,#tickfire update\n\nsr-14 south between agua d...,Go511,1,1
7708,Brian Douglas,#sigalert!!! sb #605fwy just past peck rd cras...,KNX1070,1,1
7014,Go511,tick fire\nall lanes closed in both directions...,Go511,1,1
3489,Caltrans District 7,nb i-5 all lanes scheduled to be closed norwal...,CaltransDist7,1,1
3640,Caltrans District 7,nb & sb i-5 scheduled to be closed from artesi...,CaltransDist7,1,1
4062,JoAnn @ ESAudio:),"""#nb14 (#antelopevalleyfwy) - two right lanes ...",CaltransDist7,1,1
8580,Desmond Shaw,the ripple effect from the closures on the #sa...,KNX1070,1,1
6957,Go511,#easyfire closure update:\n\n-sr-23 in both di...,Go511,1,1
6978,Go511,#easyfire closure updates\n\non & off-ramps:\n...,Go511,1,1
4488,Caltrans District 7,for current highway closures and traffic condi...,CaltransDist7,1,1


In [514]:
X_test_true_pos['text'][7145]

'#tickfire update\n\nsr-14 south between agua dulce canyon rd and soledad canyon rd,  the right lane is shutdown for an unknown duration. expect delays up to 2 hrs.\n\nstop and go traffic through the closure. all northbound lanes have been re-opened.'

In [516]:
X_test_true_pos['text'][7708]

'#sigalert!!! sb #605fwy just past peck rd crash has the 2 left lns blocked with a jam up starting at the 10.  @knx1070 #knxtraffic @caltransdist7pic.twitter.com/qfnmqzvdlp'

In [517]:
X_test_true_pos['text'][7014]

'tick fire\nall lanes closed in both directions on the sr-14 between golden valley rd and agua dulce cyn rd.\n\nsand cyn closed between sierra hwy and soledad cyn rd \n\nvasquez cyn rd closed between sierra hwy and bouquet cyn rd \n\nsierra hwy closed between sand cyn rd and davenport rd'

In [520]:
X_test_true_pos['text'][3489]

'nb i-5 all lanes scheduled to be closed norwalk bl to i-605 night of tue 11/5 & again night of wed 11/6. may repeat night of thursday 11/7. to construct falsework (temporary frame support) for 2nd half of new bridge over nb lanes. for more go to \nhttp: tinyurl.com/y57nyafo\xa0pic.twitter.com/dwwaefvxlh'

In [522]:
X_test_true_pos['text'][3640]

'nb & sb i-5 scheduled to be closed from artesia bl to carmenita rd from 11pm to 5am each night from tue night 10/22 thru the night of fri 10/25. demolition of 2nd half of old valley view av bridge. for more details go to \nhttp: tinyurl.com/yxfgt6nc\xa0 http: my5la.com\xa0pic.twitter.com/un9csivlwm'

In [523]:
X_test_true_pos['text'][4062]

'"#nb14 (#antelopevalleyfwy) - two right lanes are closed south of #soledadcanyon rd due to brushfire, #soledadfire;\n\navoid the area, expect delays!"\n\n@caltransdist7\n#losangeles #ca #socal #cafires #traffic'

In [524]:
X_test_true_pos['text'][8580]

'the ripple effect from the closures on the #saddleridgefire is incredible...massive delays into the cajon pass on both sides of the #15fwy, eastbound pearblossom highway getting bad and the kramer junction (395/58)is now jammed as well @knx1070 https: app.radio.com/social-download-knx-1070-newsradio\xa0…'

In [525]:
X_test_true_pos['text'][6957]

'#easyfire closure update:\n\n-sr-23 in both directions between avenida de los arboles in thousand oaks\xa0and los angeles ave. in moorpark\n\n-sr-118 east off/on ramp at yosemite avenue in simi valley closed until approximately 4pm.'