In [250]:
import pandas as pd
import numpy as np
import string
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize 
import re
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import TfidfVectorizer

In [251]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /Users/skwong/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/skwong/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/skwong/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [290]:
disaster = pd.read_csv('nlp-getting-started/train.csv')

In [253]:
disaster.head()

Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1


> ## add features


> ### cleaning

In [254]:
text_data = np.array(disaster[['id','text']])

In [255]:
lemmatizer = WordNetLemmatizer()
analyser = SentimentIntensityAnalyzer()
stop_words = set(stopwords.words('english')) 

In [256]:
text_data[0:5,:]

array([[1,
        'Our Deeds are the Reason of this #earthquake May ALLAH Forgive us all'],
       [4, 'Forest fire near La Ronge Sask. Canada'],
       [5,
        "All residents asked to 'shelter in place' are being notified by officers. No other evacuation or shelter in place orders are expected"],
       [6,
        '13,000 people receive #wildfires evacuation orders in California '],
       [7,
        'Just got sent this photo from Ruby #Alaska as smoke from #wildfires pours into a school ']],
      dtype=object)

In [257]:
# Removes words that are not needed. Lemmaizes words
def clean_text(arr):
    cleaned_texts = []
    for row in arr:
        text = "".join((char for char in row[1] if char not in string.punctuation))
        text = re.sub(r'^https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
        text = re.sub(r'^http?:\/\/.*[\r\n]*', '', row[1], flags=re.MULTILINE)
        text = word_tokenize(text)
        text = [lemmatizer.lemmatize(w.lower()) for w in text if not w in stop_words] 
        cleaned_texts.append(text)
        
    return np.array(cleaned_texts)

In [258]:
text_clean = clean_text(text_data)

In [259]:
disaster['clean_text'] = text_clean

> ### sentiment analysis

In [260]:
# Obtains sentiment score
def sentiment(arr):
    scores = []
    for row in arr:
        text = re.sub(r'^https?:\/\/.*[\r\n]*', '', row[1], flags=re.MULTILINE)
        text = re.sub(r'^http?:\/\/.*[\r\n]*', '', row[1], flags=re.MULTILINE)
        scores.append(list(analyser.polarity_scores(text).values()))
    return np.array(scores)

In [261]:
text_sentiment = sentiment(text_data)

In [262]:
positive = [t_s[0] for t_s in text_sentiment]
neutral = [t_s[1] for t_s in text_sentiment]
negative = [t_s[2] for t_s in text_sentiment]
compound = [t_s[3] for t_s in text_sentiment]

In [263]:
disaster['sentiment_score_positive'] = positive
disaster['sentiment_score_neutral'] = neutral
disaster['sentiment_score_negative'] = negative
disaster['sentiment_score_compound'] = compound

In [264]:
disaster.head()

Unnamed: 0,id,keyword,location,text,target,clean_text,sentiment_score_positive,sentiment_score_neutral,sentiment_score_negative,sentiment_score_compound
0,1,,,Our Deeds are the Reason of this #earthquake M...,1,"[our, deed, reason, #, earthquake, may, allah,...",0.0,0.851,0.149,0.2732
1,4,,,Forest fire near La Ronge Sask. Canada,1,"[forest, fire, near, la, ronge, sask, ., canada]",0.286,0.714,0.0,-0.34
2,5,,,All residents asked to 'shelter in place' are ...,1,"[all, resident, asked, 'shelter, place, ', not...",0.095,0.905,0.0,-0.296
3,6,,,"13,000 people receive #wildfires evacuation or...",1,"[13,000, people, receive, #, wildfire, evacuat...",0.0,1.0,0.0,0.0
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1,"[just, got, sent, photo, ruby, #, alaska, smok...",0.0,1.0,0.0,0.0


In [265]:
# word_count
disaster['word_count'] = disaster['clean_text'].apply(lambda x: len(x))
# hashtag_count
disaster['hashtag_count'] = disaster['clean_text'].apply(lambda x: len([c for c in x if c == '#']))
#df_test['hashtag_count'] = df_test['text'].apply(lambda x: len([c for c in str(x) if c == '#']))

# mention_count
disaster['mention_count'] = disaster['clean_text'].apply(lambda x: len([c for c in x if c == '@']))
#df_test['mention_count'] = df_test['text'].apply(lambda x: len([c for c in str(x) if c == '@']))

In [266]:
cleaned_text = text_data

In [267]:
def clean_text_tfidf(arr):
    cleaned_texts = []
    for row in arr:
        text = "".join((char for char in row[1] if char not in string.punctuation))
        text = re.sub(r'^https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
        text = re.sub(r'^http?:\/\/.*[\r\n]*', '', row[1], flags=re.MULTILINE)
        text = word_tokenize(text)
        text = [lemmatizer.lemmatize(w.lower()) for w in text if not w in stop_words]
        text = ' '.join(text)
        text = text.replace('# ','#')
        cleaned_texts.append(text)
        
    return np.array(cleaned_texts)

In [268]:
cleaned_text = clean_text_tfidf(cleaned_text)

In [269]:
# put it in tfidf vectorizer
tfidf = TfidfVectorizer(min_df = 5, ngram_range = (2,2))
features = tfidf.fit_transform(cleaned_text)

In [270]:
features_df = pd.DataFrame(features.todense(),columns = tfidf.get_feature_names())

In [271]:
features_df.shape

(7613, 1227)

In [272]:
disaster.reset_index(drop=True, inplace=True)
features_df.reset_index(drop=True, inplace=True)

In [273]:
disaster = pd.concat([disaster,features_df],axis = 1)

In [274]:
disaster.shape

(7613, 1240)

In [206]:
disaster.columns

Index(['id', 'keyword', 'location', 'text', 'target', 'clean_text',
       'sentiment_score_positive', 'sentiment_score_neutral',
       'sentiment_score_negative', 'sentiment_score_compound',
       ...
       'youtube video', 'youtube video http', 'û_ http', 'û_ http co',
       'ûª http', 'ûª http co', 'ûªs stock', 'ûªt let', 'ûïwhen saw',
       'ûïwhen saw coach'],
      dtype='object', length=1021)

In [207]:
disaster_drop = disaster.drop(['id','keyword','location','text','clean_text'],axis = 1)

### More on Hashtags

In [318]:
def hashtags(cleaned_texts):
    hashtags = []
    for row in cleaned_text:
        text = re.findall(r"#(\w+)", row)
        hashtags.append(text)
        
    return np.array(hashtags)

In [322]:
## Analyzes Hashtags
hashtags_col = pd.Series(hashtags(cleaned_text))
dank = pd.concat([hashtags_col,disaster.target],axis=1).reset_index()

In [323]:
dank.columns

Index(['index', 0, 'target'], dtype='object')

In [330]:
dank.loc[dank['target']==1][0].value_counts()

TypeError: unhashable type: 'list'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas/_libs/hashtable_class_helper.pxi", line 1653, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'list'


[]                       2419
[hot, prebreak, best]      13
[news]                     12
[worldnews]                 8
[hiroshima]                 8
                         ... 
[163]                       1
[wish]                      1
[theneeds, recipe]          1
[socal, realhiphop]         1
[fedex]                     1
Name: 0, Length: 660, dtype: int64

In [331]:
dank.loc[dank['target']==0][0].value_counts()

TypeError: unhashable type: 'list'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas/_libs/hashtable_class_helper.pxi", line 1653, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'list'


[]                         3470
[hot, prebreak, best]        17
[gbbo]                       11
[nowplaying]                 10
[beyhive]                     8
                           ... 
[pp15000266818, pdx911]       1
[st]                          1
[unfml, deluge]               1
[kca, votejkt48id]            1
[poster, ergo, cuff]          1
Name: 0, Length: 698, dtype: int64

> # model

In [208]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import f1_score

> ## baseline

In [209]:
X = np.array(disaster_drop.drop('target', axis = 1))
y = np.array(disaster_drop.target)

In [210]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

> ### Creates RandomizedSearch/ GridSearch CV objects

In [213]:
## Defines Dictionaries for possible values to choose from
ada_boost_dict = {'n_estimators': np.linspace(50, 500, num=46, dtype = int), 
                  'learning_rate':np.linspace(0.05,1,num=20), 
                  'algorithm' : ['SAMME', 'SAMME.R']}

log_dict = {'penalty': ['l2','none'], 
            'fit_intercept': [True, False],
           'C': np.linspace(0,5,21)}

rf_dict = {'n_estimators': np.linspace(50, 1000, num=96, dtype = int),
          'criterion':['gini','entropy'],
          'min_samples_split':np.linspace(5,50,11, dtype = int),
          'max_features':['auto','sqrt','log2',None],
          'bootstrap':[True, False]}

In [214]:
adaboost = AdaBoostClassifier()
rf = RandomForestClassifier(n_jobs=-1)
log_reg = LogisticRegression(multi_class = 'ovr', max_iter = 1000)

In [215]:
ada_model = RandomizedSearchCV(adaboost, ada_boost_dict, 60, 
                               random_state = 42, cv = 3)
rf_model = RandomizedSearchCV(rf, rf_dict, 300, 
                               random_state = 42, cv = 3)
log_model = GridSearchCV(log_reg, log_dict, cv = 3)

> ### Finds best parameters for each model

In [216]:
ada_model.fit(x_train, y_train)

RandomizedSearchCV(cv=3, error_score=nan,
                   estimator=AdaBoostClassifier(algorithm='SAMME.R',
                                                base_estimator=None,
                                                learning_rate=1.0,
                                                n_estimators=50,
                                                random_state=None),
                   iid='deprecated', n_iter=60, n_jobs=None,
                   param_distributions={'algorithm': ['SAMME', 'SAMME.R'],
                                        'learning_rate': array([0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 , 0.55,
       0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95, 1.  ]),
                                        'n_estimators': array([ 50,  60,  70,  80,  90, 100, 110, 120, 130, 140, 150, 160, 170,
       180, 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300,
       310, 320, 330, 340, 350, 360, 370, 380, 390, 400, 410, 420, 430,
       440, 450, 46

In [217]:
rf_model.fit(x_train, y_train)

RandomizedSearchCV(cv=3, error_score=nan,
                   estimator=RandomForestClassifier(bootstrap=True,
                                                    ccp_alpha=0.0,
                                                    class_weight=None,
                                                    criterion='gini',
                                                    max_depth=None,
                                                    max_features='auto',
                                                    max_leaf_nodes=None,
                                                    max_samples=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
               

In [218]:
log_model.fit(x_train, y_train)

  args=(X, target, 1. / C, sample_weight),
  out = -np.sum(sample_weight * log_logistic(yz)) + .5 * alpha * np.dot(w, w)
  grad[:n_features] = safe_sparse_dot(X.T, z0) + alpha * w
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

  args=(X, target, 1. / C, sample_weight),
  out = -np.sum(sample_weight * log_logistic(yz)) + .5 * alpha * np.dot(w, w)
  grad[:n_features] = safe_sparse_dot(X.T, z0) + alpha * w
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

  args=(X, target, 1. / C, sample_weight),
  out = -np.sum(sample_weight * log_logistic(yz)) + .5 * alpha * np.dot(w, w)
  grad[:n_features] = safe_sparse_dot(X.T, z0) + alpha * w
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to 

  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to 

  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  "Setting penalty='none' will ignore the C and l1_ratio "
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to 

GridSearchCV(cv=3, error_score=nan,
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=1000, multi_class='ovr',
                                          n_jobs=None, penalty='l2',
                                          random_state=None, solver='lbfgs',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': array([0.  , 0.25, 0.5 , 0.75, 1.  , 1.25, 1.5 , 1.75, 2.  , 2.25, 2.5 ,
       2.75, 3.  , 3.25, 3.5 , 3.75, 4.  , 4.25, 4.5 , 4.75, 5.  ]),
                         'fit_intercept': [True, False],
                         'penalty': ['l2', 'none']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=F

In [219]:
best_ada_params = ada_model.best_params_
best_log_params = log_model.best_params_
best_rf_params = rf_model.best_params_

> ### Determines the best model

In [220]:
best_log_reg = LogisticRegression(**best_log_params, multi_class = 'ovr', max_iter = 1000)
best_ada = AdaBoostClassifier(**best_ada_params)
best_rf = RandomForestClassifier(**best_rf_params, n_jobs=-1)

In [221]:
best_rf.fit(x_train, y_train)
best_log_reg.fit(x_train, y_train)
best_ada.fit(x_train, y_train)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=0.6,
                   n_estimators=280, random_state=None)

In [222]:
y_pred_rf = best_rf.predict(x_test)
y_pred_ada = best_ada.predict(x_test)
y_pred_log_reg = best_log_reg.predict(x_test)

In [223]:
f1_score(y_pred_rf, y_test)

0.6352313167259785

In [332]:
sum(y_pred_rf==y_test)/len(y_test)

0.7307944845699278

In [224]:
f1_score(y_pred_ada, y_test)

0.6215722120658136

In [333]:
sum(y_pred_ada==y_test)/len(y_test)

0.7281680892974393

In [225]:
f1_score(y_pred_log_reg, y_test)

0.6272401433691756

In [334]:
sum(y_pred_log_reg==y_test)/len(y_test)

0.726854891661195