In [39]:
import os, json, re
from pathlib import Path
import pandas as pd
import numpy as np
import glob
pd.set_option('display.max_columns', None)
import emoji

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split as tts
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV

from sklearn.metrics import classification_report as clsr
from sklearn.metrics import confusion_matrix as cm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [40]:
def strip_ASCII(text):
    return re.sub("([^\x00-\x7F])+"," ", text)

def prediction(X, y, n, cv=False, k=10):

    labels = LabelEncoder()
    y = labels.fit_transform( np.asarray(y) )#.reshape(-1,1)
    names = labels.classes_
    print("shape of X:", X.shape)
    print("shape of y:", y.shape)
    
    X_train, X_test, y_train, y_test = tts(X, y, test_size=n)
    print("shape of X_train:", X_train.shape)
    print("shape of y_train:", y_train.shape)
    print("shape of X_test:", X_test.shape)
    print("shape of y_test:", y_test.shape)
    
    if cv:
        lg = LogisticRegressionCV(cv=k, random_state=0)        
    else:
        lg = LogisticRegression()
        
    lg.fit(X_train, y_train)
    y_pred = lg.predict(X_test).reshape(-1,1)
    print("shape of y_pred:", y_pred.shape)
    print(clsr(y_test, y_pred, target_names=names))
    print(cm(y_test, y_pred, labels=[0,1]))
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    print('naive model (only no)')
    y_naive = np.array(['no']*len(y_test))
    y_naive = labels.fit_transform(y_naive)#.reshape(-1,1)
    print(clsr(y_test, y_naive, target_names=names))
    print(cm(y_test, y_naive, labels=[1,0]))

    return acc, f1

In [41]:
path_to_json = Path('C:\\Users\\niti.mishra\\Documents\\Personal\\cyberbullying\\data\\labelled_tweets')
json_pattern = os.path.join(path_to_json,'*.json')
file_list = glob.glob(json_pattern)
# file_list = file_list[:-2] 
file_list

['C:\\Users\\niti.mishra\\Documents\\Personal\\cyberbullying\\data\\labelled_tweets\\all_tweets.json']

In [24]:
tweets = pd.read_json(file_list[0],lines=True)
tweets.head()

Unnamed: 0,id,full_tweet,bullying_trace,bullying_role,form_of_bullying,bullying_post_type
0,1168174144724160518,"[@user, bullying, and, help, be, two, differen...",yes,bully,general,denial
1,1162980168232853504,"[@user, @user, i, mean, it, have, some, really...",no,,,
2,1161905202527526912,"[my, take, on, social, issue, like, this, be, ...",yes,reporter,general,report
3,1162217363023880193,"[i, want, to, watch, a, group, of, character, ...",no,,,
4,1168710966659272705,"[i, d, never, let, a, fucking, fetus, on, twit...",no,,,


In [42]:
tweets = tweets.iloc[:5900,:]
tweets.shape

(5900, 4)

In [43]:
target = 'bullying_trace'
tweets = tweets[['id', 'full_tweet', target]]

tweets['full_tweet'] = [ [strip_ASCII(token) for token in tweet] for tweet in tweets['full_tweet'] ]
tweets.head()

Unnamed: 0,id,full_tweet,bullying_trace
0,1168174144724160518,"[@user, bullying, and, help, be, two, differen...",yes
1,1162980168232853504,"[@user, @user, i, mean, it, have, some, really...",no
2,1161905202527526912,"[my, take, on, social, issue, like, this, be, ...",yes
3,1162217363023880193,"[i, want, to, watch, a, group, of, character, ...",no
4,1168710966659272705,"[i, d, never, let, a, fucking, fetus, on, twit...",no


In [66]:
# def strip_emoji(text):
# #     print(emoji.emoji_count(text))
#     new_text = re.sub(emoji.get_emoji_regexp(), r"", text)
#     return new_text


# tweets['full_tweet'] = [ [strip_emoji(token) for token in tweet] for tweet in tweets['full_tweet'] ]
# tweets.head()

In [67]:
# def strip_repeat(text):  
# #     return re.sub(r'(.)\1+', r'\1\1', text) 
#     return re.sub(r'(\w)\1+', r'\1', text)

# strip_repeat('heheehehe')
# # tweets['full_tweet'] = [ [strip_ASCII(token) for token in tweet] for tweet in tweets['full_tweet'] ]
# # tweets.head()

In [26]:
# tweets['len'] = tweets['full_tweet'].apply(len)
# tweets.head()

Unnamed: 0,id,full_tweet,bullying_trace,len
0,1168174144724160518,"[@user, bullying, and, help, be, two, differen...",yes,54
1,1162980168232853504,"[@user, @user, i, mean, it, have, some, really...",no,33
2,1161905202527526912,"[my, take, on, social, issue, like, this, be, ...",yes,41
3,1162217363023880193,"[i, want, to, watch, a, group, of, character, ...",no,43
4,1168710966659272705,"[i, d, never, let, a, fucking, fetus, on, twit...",no,29


In [27]:
# tweets.groupby('bullying_trace')['len'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
bullying_trace,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
no,4779.0,36.094999,13.886845,3.0,25.0,38.0,48.0,103.0
yes,3089.0,36.930722,14.506131,4.0,25.0,39.0,49.0,98.0


## Count Vectorizer ## 

In [70]:
cv = CountVectorizer()
cv_fit = cv.fit_transform([' '.join(tweet) for tweet in tweets['full_tweet'] ])

In [71]:
words = np.asarray(cv.get_feature_names())
count = np.asarray( cv_fit.toarray().sum(axis=0) )
corpusdictionary = dict(zip(words,count))

count = pd.DataFrame.from_dict(corpusdictionary, orient='index', columns=['count'])
count = count.sort_values(by=['count'], ascending=False)
count.to_csv('count_'+target+'.csv', index=True)
count

Unnamed: 0,count
be,11389
user,9712
to,7662
and,7512
the,7344
...,...
conjure,1
misjudge,1
misinterpret,1
conjured,1


In [72]:
X = cv_fit
X.shape

y = tweets[target]
freq = y.value_counts()           # count frequency of different classes in loan status
freq/sum(freq)*100   

no     60.739705
yes    39.260295
Name: bullying_trace, dtype: float64

In [13]:
acc, f1 = prediction(X, y, n=0.2)
print(acc, f1)

shape of X: (7868, 13986)
shape of y: (7868,)
shape of X_train: (6294, 13986)
shape of y_train: (6294,)
shape of X_test: (1574, 13986)
shape of y_test: (1574,)
shape of y_pred: (1574, 1)
              precision    recall  f1-score   support

          no       0.73      0.73      0.73       981
         yes       0.55      0.54      0.55       593

    accuracy                           0.66      1574
   macro avg       0.64      0.64      0.64      1574
weighted avg       0.66      0.66      0.66      1574

[[713 268]
 [270 323]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.62      1.00      0.77       981
         yes       0.00      0.00      0.00       593

    accuracy                           0.62      1574
   macro avg       0.31      0.50      0.38      1574
weighted avg       0.39      0.62      0.48      1574

[[  0 593]
 [  0 981]]
0.6581956797966964 0.545608108108108


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))


In [14]:
acc, f1 = prediction(X, y, n=0.2, cv=True)
print(acc, f1)

shape of X: (7868, 13986)
shape of y: (7868,)
shape of X_train: (6294, 13986)
shape of y_train: (6294,)
shape of X_test: (1574, 13986)
shape of y_test: (1574,)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1574, 1)
              precision    recall  f1-score   support

          no       0.70      0.84      0.76       969
         yes       0.62      0.42      0.50       605

    accuracy                           0.68      1574
   macro avg       0.66      0.63      0.63      1574
weighted avg       0.67      0.68      0.66      1574

[[810 159]
 [350 255]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.62      1.00      0.76       969
         yes       0.00      0.00      0.00       605

    accuracy                           0.62      1574
   macro avg       0.31      0.50      0.38      1574
weighted avg       0.38      0.62      0.47      1574

[[  0 605]
 [  0 969]]
0.6766200762388819 0.5004906771344455


  _warn_prf(average, modifier, msg_start, len(result))


In [27]:
idx = range(100, X.shape[0], 500)
scores = { }
for i in idx: 
    print(i)
    acc, f1 = prediction(X[:i,], y[:i], n=0.2, cv=True)
    scores[i] = [acc , f1]

100
shape of X: (100, 13986)
shape of y: (100,)
shape of X_train: (80, 13986)
shape of y_train: (80,)
shape of X_test: (20, 13986)
shape of y_test: (20,)
shape of y_pred: (20, 1)
              precision    recall  f1-score   support

          no       0.90      0.56      0.69        16
         yes       0.30      0.75      0.43         4

    accuracy                           0.60        20
   macro avg       0.60      0.66      0.56        20
weighted avg       0.78      0.60      0.64        20

[[9 7]
 [1 3]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.80      1.00      0.89        16
         yes       0.00      0.00      0.00         4

    accuracy                           0.80        20
   macro avg       0.40      0.50      0.44        20
weighted avg       0.64      0.80      0.71        20

[[ 0  4]
 [ 0 16]]
600
shape of X: (600, 13986)
shape of y: (600,)
shape of X_train: (480, 13986)
shape of y_train: (480,)
shape o

  _warn_prf(average, modifier, msg_start, len(result))


shape of y_pred: (120, 1)
              precision    recall  f1-score   support

          no       0.70      0.74      0.72        80
         yes       0.42      0.38      0.39        40

    accuracy                           0.62       120
   macro avg       0.56      0.56      0.56       120
weighted avg       0.61      0.62      0.61       120

[[59 21]
 [25 15]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.67      1.00      0.80        80
         yes       0.00      0.00      0.00        40

    accuracy                           0.67       120
   macro avg       0.33      0.50      0.40       120
weighted avg       0.44      0.67      0.53       120

[[ 0 40]
 [ 0 80]]
1100
shape of X: (1100, 13986)
shape of y: (1100,)
shape of X_train: (880, 13986)
shape of y_train: (880,)
shape of X_test: (220, 13986)
shape of y_test: (220,)


  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://s

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (220, 1)
              precision    recall  f1-score   support

          no       0.63      0.88      0.73       120
         yes       0.72      0.39      0.51       100

    accuracy                           0.65       220
   macro avg       0.68      0.63      0.62       220
weighted avg       0.67      0.65      0.63       220

[[105  15]
 [ 61  39]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.55      1.00      0.71       120
         yes       0.00      0.00      0.00       100

    accuracy                           0.55       220
   macro avg       0.27      0.50      0.35       220
weighted avg       0.30      0.55      0.39       220

[[  0 100]
 [  0 120]]
1600
shape of X: (1600, 13986)
shape of y: (1600,)
shape of X_train: (1280, 13986)
shape of y_train: (1280,)
shape of X_test: (320, 13986)
shape of y_test: (320,)


  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://s

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (320, 1)
              precision    recall  f1-score   support

          no       0.71      0.88      0.79       206
         yes       0.62      0.35      0.45       114

    accuracy                           0.69       320
   macro avg       0.66      0.61      0.62       320
weighted avg       0.68      0.69      0.66       320

[[181  25]
 [ 74  40]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.64      1.00      0.78       206
         yes       0.00      0.00      0.00       114

    accuracy                           0.64       320
   macro avg       0.32      0.50      0.39       320
weighted avg       0.41      0.64      0.50       320

[[  0 114]
 [  0 206]]
2100
shape of X: (2100, 13986)
shape of y: (2100,)
shape of X_train: (1680, 13986)
shape of y_train: (1680,)
shape of X_test: (420, 13986)
shape of y_test: (420,)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (420, 1)
              precision    recall  f1-score   support

          no       0.72      0.93      0.81       282
         yes       0.64      0.27      0.38       138

    accuracy                           0.71       420
   macro avg       0.68      0.60      0.59       420
weighted avg       0.69      0.71      0.67       420

[[261  21]
 [101  37]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.67      1.00      0.80       282
         yes       0.00      0.00      0.00       138

    accuracy                           0.67       420
   macro avg       0.34      0.50      0.40       420
weighted avg       0.45      0.67      0.54       420

[[  0 138]
 [  0 282]]
2600
shape of X: (2600, 13986)
shape of y: (2600,)
shape of X_train: (2080, 13986)
shape of y_train: (2080,)
shape of X_test: (520, 13986)
shape of y_test: (520,)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (520, 1)
              precision    recall  f1-score   support

          no       0.76      0.92      0.83       358
         yes       0.67      0.35      0.46       162

    accuracy                           0.74       520
   macro avg       0.72      0.64      0.64       520
weighted avg       0.73      0.74      0.72       520

[[331  27]
 [106  56]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.69      1.00      0.82       358
         yes       0.00      0.00      0.00       162

    accuracy                           0.69       520
   macro avg       0.34      0.50      0.41       520
weighted avg       0.47      0.69      0.56       520

[[  0 162]
 [  0 358]]
3100
shape of X: (3100, 13986)
shape of y: (3100,)
shape of X_train: (2480, 13986)
shape of y_train: (2480,)
shape of X_test: (620, 13986)
shape of y_test: (620,)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (620, 1)
              precision    recall  f1-score   support

          no       0.75      0.91      0.82       440
         yes       0.53      0.24      0.33       180

    accuracy                           0.72       620
   macro avg       0.64      0.58      0.58       620
weighted avg       0.68      0.72      0.68       620

[[401  39]
 [136  44]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.71      1.00      0.83       440
         yes       0.00      0.00      0.00       180

    accuracy                           0.71       620
   macro avg       0.35      0.50      0.42       620
weighted avg       0.50      0.71      0.59       620

[[  0 180]
 [  0 440]]
3600
shape of X: (3600, 13986)
shape of y: (3600,)
shape of X_train: (2880, 13986)
shape of y_train: (2880,)
shape of X_test: (720, 13986)
shape of y_test: (720,)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (720, 1)
              precision    recall  f1-score   support

          no       0.75      0.93      0.83       495
         yes       0.66      0.32      0.43       225

    accuracy                           0.73       720
   macro avg       0.70      0.62      0.63       720
weighted avg       0.72      0.73      0.70       720

[[458  37]
 [154  71]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.69      1.00      0.81       495
         yes       0.00      0.00      0.00       225

    accuracy                           0.69       720
   macro avg       0.34      0.50      0.41       720
weighted avg       0.47      0.69      0.56       720

[[  0 225]
 [  0 495]]
4100
shape of X: (4100, 13986)
shape of y: (4100,)
shape of X_train: (3280, 13986)
shape of y_train: (3280,)
shape of X_test: (820, 13986)
shape of y_test: (820,)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (820, 1)
              precision    recall  f1-score   support

          no       0.71      0.91      0.80       528
         yes       0.68      0.33      0.44       292

    accuracy                           0.70       820
   macro avg       0.69      0.62      0.62       820
weighted avg       0.70      0.70      0.67       820

[[482  46]
 [196  96]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.64      1.00      0.78       528
         yes       0.00      0.00      0.00       292

    accuracy                           0.64       820
   macro avg       0.32      0.50      0.39       820
weighted avg       0.41      0.64      0.50       820

[[  0 292]
 [  0 528]]
4600
shape of X: (4600, 13986)
shape of y: (4600,)
shape of X_train: (3680, 13986)
shape of y_train: (3680,)
shape of X_test: (920, 13986)
shape of y_test: (920,)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (920, 1)
              precision    recall  f1-score   support

          no       0.70      0.89      0.78       591
         yes       0.61      0.31      0.41       329

    accuracy                           0.68       920
   macro avg       0.65      0.60      0.60       920
weighted avg       0.67      0.68      0.65       920

[[525  66]
 [226 103]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.64      1.00      0.78       591
         yes       0.00      0.00      0.00       329

    accuracy                           0.64       920
   macro avg       0.32      0.50      0.39       920
weighted avg       0.41      0.64      0.50       920

[[  0 329]
 [  0 591]]
5100
shape of X: (5100, 13986)
shape of y: (5100,)
shape of X_train: (4080, 13986)
shape of y_train: (4080,)
shape of X_test: (1020, 13986)
shape of y_test: (1020,)


  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://s

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1020, 1)
              precision    recall  f1-score   support

          no       0.71      0.87      0.79       663
         yes       0.60      0.35      0.44       357

    accuracy                           0.69      1020
   macro avg       0.66      0.61      0.62      1020
weighted avg       0.67      0.69      0.67      1020

[[579  84]
 [231 126]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.65      1.00      0.79       663
         yes       0.00      0.00      0.00       357

    accuracy                           0.65      1020
   macro avg       0.33      0.50      0.39      1020
weighted avg       0.42      0.65      0.51      1020

[[  0 357]
 [  0 663]]
5600
shape of X: (5600, 13986)
shape of y: (5600,)
shape of X_train: (4480, 13986)
shape of y_train: (4480,)
shape of X_test: (1120, 13986)
shape of y_test: (1120,)


  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://s

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))


shape of y_pred: (1120, 1)
              precision    recall  f1-score   support

          no       0.70      0.87      0.78       699
         yes       0.65      0.39      0.48       421

    accuracy                           0.69      1120
   macro avg       0.67      0.63      0.63      1120
weighted avg       0.68      0.69      0.67      1120

[[610  89]
 [258 163]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.62      1.00      0.77       699
         yes       0.00      0.00      0.00       421

    accuracy                           0.62      1120
   macro avg       0.31      0.50      0.38      1120
weighted avg       0.39      0.62      0.48      1120

[[  0 421]
 [  0 699]]
6100
shape of X: (6100, 13986)
shape of y: (6100,)
shape of X_train: (4880, 13986)
shape of y_train: (4880,)
shape of X_test: (1220, 13986)
shape of y_test: (1220,)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1220, 1)
              precision    recall  f1-score   support

          no       0.69      0.88      0.77       736
         yes       0.69      0.40      0.51       484

    accuracy                           0.69      1220
   macro avg       0.69      0.64      0.64      1220
weighted avg       0.69      0.69      0.67      1220

[[647  89]
 [288 196]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.60      1.00      0.75       736
         yes       0.00      0.00      0.00       484

    accuracy                           0.60      1220
   macro avg       0.30      0.50      0.38      1220
weighted avg       0.36      0.60      0.45      1220

[[  0 484]
 [  0 736]]
6600
shape of X: (6600, 13986)
shape of y: (6600,)
shape of X_train: (5280, 13986)
shape of y_train: (5280,)
shape of X_test: (1320, 13986)
shape of y_test: (1320,)


  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://s

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1320, 1)
              precision    recall  f1-score   support

          no       0.70      0.87      0.77       820
         yes       0.63      0.38      0.47       500

    accuracy                           0.68      1320
   macro avg       0.66      0.62      0.62      1320
weighted avg       0.67      0.68      0.66      1320

[[710 110]
 [311 189]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.62      1.00      0.77       820
         yes       0.00      0.00      0.00       500

    accuracy                           0.62      1320
   macro avg       0.31      0.50      0.38      1320
weighted avg       0.39      0.62      0.48      1320

[[  0 500]
 [  0 820]]
7100
shape of X: (7100, 13986)
shape of y: (7100,)
shape of X_train: (5680, 13986)
shape of y_train: (5680,)
shape of X_test: (1420, 13986)
shape of y_test: (1420,)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1420, 1)
              precision    recall  f1-score   support

          no       0.70      0.86      0.78       873
         yes       0.66      0.42      0.51       547

    accuracy                           0.69      1420
   macro avg       0.68      0.64      0.64      1420
weighted avg       0.69      0.69      0.67      1420

[[754 119]
 [317 230]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.61      1.00      0.76       873
         yes       0.00      0.00      0.00       547

    accuracy                           0.61      1420
   macro avg       0.31      0.50      0.38      1420
weighted avg       0.38      0.61      0.47      1420

[[  0 547]
 [  0 873]]
7600
shape of X: (7600, 13986)
shape of y: (7600,)
shape of X_train: (6080, 13986)
shape of y_train: (6080,)
shape of X_test: (1520, 13986)
shape of y_test: (1520,)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1520, 1)
              precision    recall  f1-score   support

          no       0.70      0.86      0.78       939
         yes       0.65      0.41      0.51       581

    accuracy                           0.69      1520
   macro avg       0.68      0.64      0.64      1520
weighted avg       0.68      0.69      0.67      1520

[[810 129]
 [341 240]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.62      1.00      0.76       939
         yes       0.00      0.00      0.00       581

    accuracy                           0.62      1520
   macro avg       0.31      0.50      0.38      1520
weighted avg       0.38      0.62      0.47      1520

[[  0 581]
 [  0 939]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))


In [28]:
scores

{100: [0.6, 0.4285714285714285],
 600: [0.6166666666666667, 0.3947368421052631],
 1100: [0.6545454545454545, 0.5064935064935066],
 1600: [0.690625, 0.446927374301676],
 2100: [0.7095238095238096, 0.37755102040816324],
 2600: [0.7442307692307693, 0.45714285714285713],
 3100: [0.717741935483871, 0.3346007604562738],
 3600: [0.7347222222222223, 0.42642642642642636],
 4100: [0.7048780487804878, 0.44239631336405527],
 4600: [0.6826086956521739, 0.4136546184738956],
 5100: [0.6911764705882353, 0.4444444444444445],
 5600: [0.6901785714285714, 0.4843982169390787],
 6100: [0.690983606557377, 0.5097529258777632],
 6600: [0.681060606060606, 0.4730913642052566],
 7100: [0.6929577464788732, 0.5133928571428572],
 7600: [0.6907894736842105, 0.5052631578947369]}

In [74]:
labels = LabelEncoder()
y = labels.fit_transform( np.asarray(y) )
names = labels.classes_
print("shape of X:", X.shape)
print("shape of y:", y.shape)

X_train, X_test, y_train, y_test =tts(X, y, random_state=0, stratify=y, shuffle=True)
print("shape of X_train:", X_train.shape)
print("shape of y_train:", y_train.shape)
print("shape of X_test:", X_test.shape)
print("shape of y_test:", y_test.shape)

scores_copy = { }
idx = range(200, X_train.shape[0], 300)
for i in idx:
    lg = LogisticRegressionCV(cv=10, random_state=0, max_iter=1000)         
    lg.fit(X_train[:i,], y_train[:i])
    y_pred = lg.predict(X_test)
    print("shape of y_pred:", y_pred.shape)
    
    print(clsr(y_test, y_pred))
    print(cm(y_test, y_pred))#, labels=names))
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, labels=np.unique(y_pred))
    
    scores_copy[i] = [acc , f1]

scores_copy

shape of X: (7868, 13986)
shape of y: (7868,)
shape of X_train: (5901, 13986)
shape of y_train: (5901,)
shape of X_test: (1967, 13986)
shape of y_test: (1967,)
shape of y_pred: (1967,)
              precision    recall  f1-score   support

           0       0.65      0.87      0.74      1195
           1       0.58      0.29      0.38       772

    accuracy                           0.64      1967
   macro avg       0.62      0.58      0.56      1967
weighted avg       0.62      0.64      0.60      1967

[[1035  160]
 [ 550  222]]
shape of y_pred: (1967,)
              precision    recall  f1-score   support

           0       0.66      0.84      0.74      1195
           1       0.59      0.34      0.43       772

    accuracy                           0.65      1967
   macro avg       0.63      0.59      0.59      1967
weighted avg       0.63      0.65      0.62      1967

[[1009  186]
 [ 509  263]]
shape of y_pred: (1967,)
              precision    recall  f1-score   support

  

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1967,)
              precision    recall  f1-score   support

           0       0.69      0.82      0.75      1195
           1       0.61      0.43      0.50       772

    accuracy                           0.67      1967
   macro avg       0.65      0.63      0.63      1967
weighted avg       0.66      0.67      0.65      1967

[[984 211]
 [442 330]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1967,)
              precision    recall  f1-score   support

           0       0.70      0.83      0.76      1195
           1       0.63      0.44      0.52       772

    accuracy                           0.68      1967
   macro avg       0.67      0.64      0.64      1967
weighted avg       0.67      0.68      0.67      1967

[[996 199]
 [430 342]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1967,)
              precision    recall  f1-score   support

           0       0.70      0.83      0.76      1195
           1       0.63      0.44      0.52       772

    accuracy                           0.68      1967
   macro avg       0.66      0.64      0.64      1967
weighted avg       0.67      0.68      0.66      1967

[[996 199]
 [434 338]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1967,)
              precision    recall  f1-score   support

           0       0.70      0.84      0.76      1195
           1       0.64      0.44      0.52       772

    accuracy                           0.68      1967
   macro avg       0.67      0.64      0.64      1967
weighted avg       0.67      0.68      0.67      1967

[[999 196]
 [431 341]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1967,)
              precision    recall  f1-score   support

           0       0.70      0.83      0.76      1195
           1       0.63      0.44      0.52       772

    accuracy                           0.68      1967
   macro avg       0.66      0.64      0.64      1967
weighted avg       0.67      0.68      0.66      1967

[[991 204]
 [431 341]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1967,)
              precision    recall  f1-score   support

           0       0.70      0.83      0.76      1195
           1       0.63      0.44      0.52       772

    accuracy                           0.68      1967
   macro avg       0.66      0.63      0.64      1967
weighted avg       0.67      0.68      0.66      1967

[[992 203]
 [433 339]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1967,)
              precision    recall  f1-score   support

           0       0.70      0.84      0.76      1195
           1       0.64      0.44      0.52       772

    accuracy                           0.68      1967
   macro avg       0.67      0.64      0.64      1967
weighted avg       0.68      0.68      0.67      1967

[[1001  194]
 [ 430  342]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1967,)
              precision    recall  f1-score   support

           0       0.70      0.83      0.76      1195
           1       0.63      0.44      0.52       772

    accuracy                           0.68      1967
   macro avg       0.66      0.64      0.64      1967
weighted avg       0.67      0.68      0.66      1967

[[996 199]
 [435 337]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1967,)
              precision    recall  f1-score   support

           0       0.70      0.84      0.76      1195
           1       0.64      0.45      0.53       772

    accuracy                           0.68      1967
   macro avg       0.67      0.64      0.64      1967
weighted avg       0.68      0.68      0.67      1967

[[1000  195]
 [ 427  345]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1967,)
              precision    recall  f1-score   support

           0       0.70      0.84      0.76      1195
           1       0.64      0.44      0.52       772

    accuracy                           0.68      1967
   macro avg       0.67      0.64      0.64      1967
weighted avg       0.67      0.68      0.67      1967

[[1003  192]
 [ 433  339]]


{200: [0.6390442297915607, 0.38474870017331025],
 500: [0.646670055922725, 0.4307944307944308],
 800: [0.6415861718352821, 0.5031712473572938],
 1100: [0.6598881545500762, 0.4769351055512118],
 1400: [0.6639552618200305, 0.487993803253292],
 1700: [0.6649720386375191, 0.49346656418139895],
 2000: [0.6700559227249618, 0.5079605761940864],
 2300: [0.6649720386375191, 0.49424405218726014],
 2600: [0.6634468734112863, 0.48919753086419754],
 2900: [0.6680223690899848, 0.4910366328916602],
 3200: [0.6680223690899848, 0.5026656511805027],
 3500: [0.6802236908998475, 0.520944402132521],
 3800: [0.6781901372648703, 0.5164247517188695],
 4100: [0.681240467717336, 0.5210084033613446],
 4400: [0.6771733604473817, 0.5178435839028094],
 4700: [0.6766649720386375, 0.5159817351598175],
 5000: [0.6827656329435688, 0.5229357798165137],
 5300: [0.6776817488561261, 0.5152905198776758],
 5600: [0.6837824097610574, 0.5259146341463413],
 5900: [0.6822572445348246, 0.5203376822716808]}

## TDIDF Vectorizer ## 

In [48]:
def identity(words):
    return words
vectorizer = TfidfVectorizer(tokenizer=identity, encoding='utf-8', preprocessor=None, use_idf=True,
                             lowercase=False, ngram_range=(1,2)
#                              , stop_words='english',
#                              min_df=5, max_df=0.8
                            )

In [49]:
tfidf_fit = vectorizer.fit_transform([tweet for tweet in tweets['full_tweet']])
X = tfidf_fit
X.shape

y = tweets[target]
freq = y.value_counts()           # count frequency of different classes in loan status
freq/sum(freq)*100  

no     62.084746
yes    37.915254
Name: bullying_trace, dtype: float64

In [33]:
# X = pd.DataFrame(X)
# X = pd.concat( [X, tweets['len']], axis=1)
# X.shape

(7868, 2)

In [8]:
df_idf = pd.DataFrame(vectorizer.idf_, index=vectorizer.get_feature_names(), columns=['idf_weights'])
weights = df_idf.sort_values(by=['idf_weights'], ascending=False)
weights.to_csv('tdidfweights_'+target+'.csv', index=True)
# weights.to_csv('tdidfweights_noemoji.csv', index=True)
weights
# the lower the idf value of a word, the less unique it is to any particular document
# terms with higher weight scores are considered to be more important

Unnamed: 0,idf_weights
levi have,9.277539
on minority,9.277539
on people's,9.277539
on pc,9.277539
on particular,9.277539
...,...
and,1.565095
to,1.513031
@user,1.507527
be,1.320012


In [9]:
# tdidf score of first tweet
# if a word occurs multiple times in a document, we should boost its relevance as it should be 
# more meaningful than other words that appear fewer times (TF)
# On the other hand, if a word occurs many times in all documents, maybe it is just a frequent word
vector = pd.DataFrame(X[1].T.todense(), index=vectorizer.get_feature_names(), columns=['tdidf'])
vector.sort_values(by=['tdidf'], ascending=False)

Unnamed: 0,tdidf
pretty depress,0.195831
really graphic,0.195831
first game,0.195831
game at,0.195831
depress at,0.195831
...,...
force compliance,0.000000
force civil,0.000000
force christian,0.000000
force china,0.000000


In [10]:
D = vectorizer.vocabulary_
max_word = max(D, key=D.get)
max_value = max(D.values())
print(max_word, max_value)

~ you 126620


In [50]:
acc, f1 = prediction(X, y, n=0.2)
print(acc, f1)

shape of X: (5900, 101560)
shape of y: (5900,)
shape of X_train: (4720, 101560)
shape of y_train: (4720,)
shape of X_test: (1180, 101560)
shape of y_test: (1180,)
shape of y_pred: (1180, 1)
              precision    recall  f1-score   support

          no       0.70      0.92      0.80       736
         yes       0.73      0.35      0.47       444

    accuracy                           0.71      1180
   macro avg       0.72      0.64      0.64      1180
weighted avg       0.71      0.71      0.68      1180

[[680  56]
 [289 155]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.62      1.00      0.77       736
         yes       0.00      0.00      0.00       444

    accuracy                           0.62      1180
   macro avg       0.31      0.50      0.38      1180
weighted avg       0.39      0.62      0.48      1180

[[  0 444]
 [  0 736]]
0.7076271186440678 0.47328244274809156


  _warn_prf(average, modifier, msg_start, len(result))


In [51]:
acc, f1 = prediction(X, y, n=0.2, cv=True)
print(acc, f1)

shape of X: (5900, 101560)
shape of y: (5900,)
shape of X_train: (4720, 101560)
shape of y_train: (4720,)
shape of X_test: (1180, 101560)
shape of y_test: (1180,)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1180, 1)
              precision    recall  f1-score   support

          no       0.73      0.89      0.80       751
         yes       0.69      0.43      0.53       429

    accuracy                           0.72      1180
   macro avg       0.71      0.66      0.66      1180
weighted avg       0.72      0.72      0.70      1180

[[668  83]
 [246 183]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.64      1.00      0.78       751
         yes       0.00      0.00      0.00       429

    accuracy                           0.64      1180
   macro avg       0.32      0.50      0.39      1180
weighted avg       0.41      0.64      0.50      1180

[[  0 429]
 [  0 751]]
0.7211864406779661 0.5266187050359712


  _warn_prf(average, modifier, msg_start, len(result))


In [24]:
idx = range(100, X.shape[0], 500)
scores_tfidf = { }
for i in idx: 
    print(i)
    acc, f1 = prediction(X[:i,], y[:i], n=0.2, cv=True)
    scores_tfidf[i] = [acc , f1]

100
shape of X: (100, 126621)
shape of y: (100,)
shape of X_train: (80, 126621)
shape of y_train: (80,)
shape of X_test: (20, 126621)
shape of y_test: (20,)
shape of y_pred: (20, 1)
              precision    recall  f1-score   support

          no       0.55      1.00      0.71        11
         yes       0.00      0.00      0.00         9

    accuracy                           0.55        20
   macro avg       0.28      0.50      0.35        20
weighted avg       0.30      0.55      0.39        20

[[11  0]
 [ 9  0]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.55      1.00      0.71        11
         yes       0.00      0.00      0.00         9

    accuracy                           0.55        20
   macro avg       0.28      0.50      0.35        20
weighted avg       0.30      0.55      0.39        20

[[ 0  9]
 [ 0 11]]
600
shape of X: (600, 126621)
shape of y: (600,)
shape of X_train: (480, 126621)
shape of y_train: (480,

  _warn_prf(average, modifier, msg_start, len(result))


shape of y_pred: (120, 1)
              precision    recall  f1-score   support

          no       0.62      0.90      0.74        70
         yes       0.63      0.24      0.35        50

    accuracy                           0.62       120
   macro avg       0.63      0.57      0.54       120
weighted avg       0.63      0.62      0.57       120

[[63  7]
 [38 12]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.58      1.00      0.74        70
         yes       0.00      0.00      0.00        50

    accuracy                           0.58       120
   macro avg       0.29      0.50      0.37       120
weighted avg       0.34      0.58      0.43       120

[[ 0 50]
 [ 0 70]]
1100
shape of X: (1100, 126621)
shape of y: (1100,)
shape of X_train: (880, 126621)
shape of y_train: (880,)
shape of X_test: (220, 126621)
shape of y_test: (220,)


  _warn_prf(average, modifier, msg_start, len(result))


shape of y_pred: (220, 1)
              precision    recall  f1-score   support

          no       0.72      0.90      0.80       136
         yes       0.72      0.43      0.54        84

    accuracy                           0.72       220
   macro avg       0.72      0.66      0.67       220
weighted avg       0.72      0.72      0.70       220

[[122  14]
 [ 48  36]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.62      1.00      0.76       136
         yes       0.00      0.00      0.00        84

    accuracy                           0.62       220
   macro avg       0.31      0.50      0.38       220
weighted avg       0.38      0.62      0.47       220

[[  0  84]
 [  0 136]]
1600
shape of X: (1600, 126621)
shape of y: (1600,)
shape of X_train: (1280, 126621)
shape of y_train: (1280,)
shape of X_test: (320, 126621)
shape of y_test: (320,)


  _warn_prf(average, modifier, msg_start, len(result))


shape of y_pred: (320, 1)
              precision    recall  f1-score   support

          no       0.76      0.94      0.84       216
         yes       0.74      0.38      0.50       104

    accuracy                           0.75       320
   macro avg       0.75      0.66      0.67       320
weighted avg       0.75      0.75      0.73       320

[[202  14]
 [ 65  39]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.68      1.00      0.81       216
         yes       0.00      0.00      0.00       104

    accuracy                           0.68       320
   macro avg       0.34      0.50      0.40       320
weighted avg       0.46      0.68      0.54       320

[[  0 104]
 [  0 216]]
2100
shape of X: (2100, 126621)
shape of y: (2100,)
shape of X_train: (1680, 126621)
shape of y_train: (1680,)
shape of X_test: (420, 126621)
shape of y_test: (420,)


  _warn_prf(average, modifier, msg_start, len(result))


shape of y_pred: (420, 1)
              precision    recall  f1-score   support

          no       0.75      0.90      0.82       285
         yes       0.63      0.36      0.46       135

    accuracy                           0.73       420
   macro avg       0.69      0.63      0.64       420
weighted avg       0.71      0.73      0.70       420

[[256  29]
 [ 86  49]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.68      1.00      0.81       285
         yes       0.00      0.00      0.00       135

    accuracy                           0.68       420
   macro avg       0.34      0.50      0.40       420
weighted avg       0.46      0.68      0.55       420

[[  0 135]
 [  0 285]]
2600
shape of X: (2600, 126621)
shape of y: (2600,)
shape of X_train: (2080, 126621)
shape of y_train: (2080,)
shape of X_test: (520, 126621)
shape of y_test: (520,)


  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


shape of y_pred: (520, 1)
              precision    recall  f1-score   support

          no       0.76      0.94      0.84       366
         yes       0.68      0.31      0.42       154

    accuracy                           0.75       520
   macro avg       0.72      0.62      0.63       520
weighted avg       0.74      0.75      0.72       520

[[344  22]
 [107  47]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.70      1.00      0.83       366
         yes       0.00      0.00      0.00       154

    accuracy                           0.70       520
   macro avg       0.35      0.50      0.41       520
weighted avg       0.50      0.70      0.58       520

[[  0 154]
 [  0 366]]
3100
shape of X: (3100, 126621)
shape of y: (3100,)
shape of X_train: (2480, 126621)
shape of y_train: (2480,)
shape of X_test: (620, 126621)
shape of y_test: (620,)


  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://s

shape of y_pred: (620, 1)
              precision    recall  f1-score   support

          no       0.73      0.94      0.82       422
         yes       0.67      0.26      0.37       198

    accuracy                           0.72       620
   macro avg       0.70      0.60      0.60       620
weighted avg       0.71      0.72      0.68       620

[[397  25]
 [147  51]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.68      1.00      0.81       422
         yes       0.00      0.00      0.00       198

    accuracy                           0.68       620
   macro avg       0.34      0.50      0.40       620
weighted avg       0.46      0.68      0.55       620

[[  0 198]
 [  0 422]]
3600
shape of X: (3600, 126621)
shape of y: (3600,)
shape of X_train: (2880, 126621)
shape of y_train: (2880,)
shape of X_test: (720, 126621)
shape of y_test: (720,)


  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://s

shape of y_pred: (720, 1)
              precision    recall  f1-score   support

          no       0.78      0.87      0.82       511
         yes       0.56      0.40      0.47       209

    accuracy                           0.74       720
   macro avg       0.67      0.64      0.65       720
weighted avg       0.72      0.74      0.72       720

[[446  65]
 [125  84]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.71      1.00      0.83       511
         yes       0.00      0.00      0.00       209

    accuracy                           0.71       720
   macro avg       0.35      0.50      0.42       720
weighted avg       0.50      0.71      0.59       720

[[  0 209]
 [  0 511]]
4100
shape of X: (4100, 126621)
shape of y: (4100,)
shape of X_train: (3280, 126621)
shape of y_train: (3280,)
shape of X_test: (820, 126621)
shape of y_test: (820,)


  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://s

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  _warn_prf(average, modifier, msg_start, len(result))


shape of y_pred: (820, 1)
              precision    recall  f1-score   support

          no       0.74      0.88      0.81       538
         yes       0.65      0.41      0.50       282

    accuracy                           0.72       820
   macro avg       0.69      0.65      0.65       820
weighted avg       0.71      0.72      0.70       820

[[475  63]
 [166 116]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.66      1.00      0.79       538
         yes       0.00      0.00      0.00       282

    accuracy                           0.66       820
   macro avg       0.33      0.50      0.40       820
weighted avg       0.43      0.66      0.52       820

[[  0 282]
 [  0 538]]
4600
shape of X: (4600, 126621)
shape of y: (4600,)
shape of X_train: (3680, 126621)
shape of y_train: (3680,)
shape of X_test: (920, 126621)
shape of y_test: (920,)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (920, 1)
              precision    recall  f1-score   support

          no       0.73      0.90      0.81       583
         yes       0.71      0.41      0.52       337

    accuracy                           0.72       920
   macro avg       0.72      0.66      0.66       920
weighted avg       0.72      0.72      0.70       920

[[527  56]
 [199 138]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.63      1.00      0.78       583
         yes       0.00      0.00      0.00       337

    accuracy                           0.63       920
   macro avg       0.32      0.50      0.39       920
weighted avg       0.40      0.63      0.49       920

[[  0 337]
 [  0 583]]
5100
shape of X: (5100, 126621)
shape of y: (5100,)
shape of X_train: (4080, 126621)
shape of y_train: (4080,)
shape of X_test: (1020, 126621)
shape of y_test: (1020,)


  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://s

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1020, 1)
              precision    recall  f1-score   support

          no       0.72      0.90      0.80       636
         yes       0.72      0.41      0.52       384

    accuracy                           0.72      1020
   macro avg       0.72      0.66      0.66      1020
weighted avg       0.72      0.72      0.70      1020

[[574  62]
 [226 158]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.62      1.00      0.77       636
         yes       0.00      0.00      0.00       384

    accuracy                           0.62      1020
   macro avg       0.31      0.50      0.38      1020
weighted avg       0.39      0.62      0.48      1020

[[  0 384]
 [  0 636]]
5600
shape of X: (5600, 126621)
shape of y: (5600,)
shape of X_train: (4480, 126621)
shape of y_train: (4480,)
shape of X_test: (1120, 126621)
shape of y_test: (1120,)


  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://s

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1120, 1)
              precision    recall  f1-score   support

          no       0.73      0.89      0.80       699
         yes       0.71      0.47      0.56       421

    accuracy                           0.73      1120
   macro avg       0.72      0.68      0.68      1120
weighted avg       0.73      0.73      0.71      1120

[[620  79]
 [225 196]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.62      1.00      0.77       699
         yes       0.00      0.00      0.00       421

    accuracy                           0.62      1120
   macro avg       0.31      0.50      0.38      1120
weighted avg       0.39      0.62      0.48      1120

[[  0 421]
 [  0 699]]
6100
shape of X: (6100, 126621)
shape of y: (6100,)
shape of X_train: (4880, 126621)
shape of y_train: (4880,)
shape of X_test: (1220, 126621)
shape of y_test: (1220,)


  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://s

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


shape of y_pred: (1220, 1)
              precision    recall  f1-score   support

          no       0.72      0.89      0.80       775
         yes       0.67      0.40      0.50       445

    accuracy                           0.71      1220
   macro avg       0.70      0.64      0.65      1220
weighted avg       0.70      0.71      0.69      1220

[[687  88]
 [266 179]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.64      1.00      0.78       775
         yes       0.00      0.00      0.00       445

    accuracy                           0.64      1220
   macro avg       0.32      0.50      0.39      1220
weighted avg       0.40      0.64      0.49      1220

[[  0 445]
 [  0 775]]
6600
shape of X: (6600, 126621)
shape of y: (6600,)
shape of X_train: (5280, 126621)
shape of y_train: (5280,)
shape of X_test: (1320, 126621)
shape of y_test: (1320,)


  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://s

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


shape of y_pred: (1320, 1)
              precision    recall  f1-score   support

          no       0.72      0.89      0.79       810
         yes       0.72      0.45      0.55       510

    accuracy                           0.72      1320
   macro avg       0.72      0.67      0.67      1320
weighted avg       0.72      0.72      0.70      1320

[[721  89]
 [283 227]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.61      1.00      0.76       810
         yes       0.00      0.00      0.00       510

    accuracy                           0.61      1320
   macro avg       0.31      0.50      0.38      1320
weighted avg       0.38      0.61      0.47      1320

[[  0 510]
 [  0 810]]
7100
shape of X: (7100, 126621)
shape of y: (7100,)
shape of X_train: (5680, 126621)
shape of y_train: (5680,)
shape of X_test: (1420, 126621)
shape of y_test: (1420,)


  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://s

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


shape of y_pred: (1420, 1)
              precision    recall  f1-score   support

          no       0.73      0.83      0.77       880
         yes       0.64      0.49      0.55       540

    accuracy                           0.70      1420
   macro avg       0.68      0.66      0.66      1420
weighted avg       0.69      0.70      0.69      1420

[[731 149]
 [276 264]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.62      1.00      0.77       880
         yes       0.00      0.00      0.00       540

    accuracy                           0.62      1420
   macro avg       0.31      0.50      0.38      1420
weighted avg       0.38      0.62      0.47      1420

[[  0 540]
 [  0 880]]
7600
shape of X: (7600, 126621)
shape of y: (7600,)
shape of X_train: (6080, 126621)
shape of y_train: (6080,)
shape of X_test: (1520, 126621)
shape of y_test: (1520,)


  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://s

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1520, 1)
              precision    recall  f1-score   support

          no       0.74      0.85      0.79       945
         yes       0.67      0.51      0.58       575

    accuracy                           0.72      1520
   macro avg       0.70      0.68      0.68      1520
weighted avg       0.71      0.72      0.71      1520

[[799 146]
 [284 291]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.62      1.00      0.77       945
         yes       0.00      0.00      0.00       575

    accuracy                           0.62      1520
   macro avg       0.31      0.50      0.38      1520
weighted avg       0.39      0.62      0.48      1520

[[  0 575]
 [  0 945]]


  _warn_prf(average, modifier, msg_start, len(result))


In [25]:
scores_tfidf

{100: [0.55, 0.0],
 600: [0.625, 0.34782608695652173],
 1100: [0.7181818181818181, 0.5373134328358209],
 1600: [0.753125, 0.4968152866242037],
 2100: [0.7261904761904762, 0.46009389671361506],
 2600: [0.7519230769230769, 0.4215246636771301],
 3100: [0.7225806451612903, 0.3722627737226277],
 3600: [0.7361111111111112, 0.4692737430167597],
 4100: [0.7207317073170731, 0.5032537960954447],
 4600: [0.7228260869565217, 0.519774011299435],
 5100: [0.7176470588235294, 0.5231788079470198],
 5600: [0.7285714285714285, 0.5632183908045978],
 6100: [0.7098360655737705, 0.502808988764045],
 6600: [0.7181818181818181, 0.5496368038740921],
 7100: [0.7007042253521126, 0.5540398740818467],
 7600: [0.7171052631578947, 0.575098814229249]}

In [38]:
tfidf_fit = vectorizer.fit_transform([tweet for tweet in tweets['full_tweet']])
X = tfidf_fit
X.shape

y = tweets[target]
freq = y.value_counts()           # count frequency of different classes in loan status
freq/sum(freq)*100  

labels = LabelEncoder()
y = labels.fit_transform( np.asarray(y) )
names = labels.classes_
print("shape of X:", X.shape)
print("shape of y:", y.shape)

X_train, X_test, y_train, y_test =tts(X, y, random_state=0, stratify=y, shuffle=True)
print("shape of X_train:", X_train.shape)
print("shape of y_train:", y_train.shape)
print("shape of X_test:", X_test.shape)
print("shape of y_test:", y_test.shape)

scores_copy = { }
idx = range(200, X_train.shape[0], 200)
for i in idx:
    lg = LogisticRegressionCV(cv=10, random_state=0, max_iter=1000)         
    lg.fit(X_train[:i,], y_train[:i])
    y_pred = lg.predict(X_test)
    print("shape of y_pred:", y_pred.shape)
    
    print(clsr(y_test, y_pred))
    print(cm(y_test, y_pred))#, labels=names))
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, labels=np.unique(y_pred))
    
    scores_copy[i] = [acc , f1]

scores_copy

shape of X: (7868, 4885)
shape of y: (7868,)
shape of X_train: (5901, 4885)
shape of y_train: (5901,)
shape of X_test: (1967, 4885)
shape of y_test: (1967,)
shape of y_pred: (1967,)
              precision    recall  f1-score   support

           0       0.61      0.96      0.75      1195
           1       0.48      0.06      0.11       772

    accuracy                           0.61      1967
   macro avg       0.55      0.51      0.43      1967
weighted avg       0.56      0.61      0.50      1967

[[1143   52]
 [ 724   48]]
shape of y_pred: (1967,)
              precision    recall  f1-score   support

           0       0.63      0.89      0.74      1195
           1       0.53      0.20      0.29       772

    accuracy                           0.62      1967
   macro avg       0.58      0.54      0.51      1967
weighted avg       0.59      0.62      0.56      1967

[[1061  134]
 [ 621  151]]
shape of y_pred: (1967,)
              precision    recall  f1-score   support

     

shape of y_pred: (1967,)
              precision    recall  f1-score   support

           0       0.69      0.80      0.74      1195
           1       0.60      0.45      0.51       772

    accuracy                           0.66      1967
   macro avg       0.64      0.63      0.63      1967
weighted avg       0.66      0.66      0.65      1967

[[958 237]
 [423 349]]
shape of y_pred: (1967,)
              precision    recall  f1-score   support

           0       0.70      0.80      0.75      1195
           1       0.60      0.48      0.53       772

    accuracy                           0.67      1967
   macro avg       0.65      0.64      0.64      1967
weighted avg       0.66      0.67      0.66      1967

[[954 241]
 [404 368]]
shape of y_pred: (1967,)
              precision    recall  f1-score   support

           0       0.70      0.79      0.74      1195
           1       0.60      0.47      0.53       772

    accuracy                           0.67      1967
   macr

{200: [0.6054905948144382, 0.11009174311926606],
 400: [0.6161667513980681, 0.2857142857142857],
 600: [0.6217590238942552, 0.4577259475218659],
 800: [0.6359938993390951, 0.35957066189624326],
 1000: [0.6441281138790036, 0.37943262411347517],
 1200: [0.6380274529740722, 0.3945578231292517],
 1400: [0.643111337061515, 0.43111831442463533],
 1600: [0.6385358413828165, 0.4423529411764706],
 1800: [0.6420945602440264, 0.4525660964230171],
 2000: [0.6446365022877478, 0.4593967517401392],
 2200: [0.6497203863751907, 0.47283856159143073],
 2400: [0.6375190645653279, 0.4494208494208495],
 2600: [0.6446365022877478, 0.4684410646387832],
 2800: [0.6517539400101677, 0.48301886792452825],
 3000: [0.6578546009150992, 0.5040530582166545],
 3200: [0.6644636502287747, 0.5139911634756996],
 3400: [0.6614133197763091, 0.5131578947368421],
 3600: [0.6583629893238434, 0.5073313782991201],
 3800: [0.6624300965937977, 0.513909224011713],
 4000: [0.6634468734112863, 0.5146627565982406],
 4200: [0.6675139806

In [37]:
print(clsr(y_test, y_pred))
print(cm(y_test, y_pred))#, labels=names))
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, labels=np.unique(y_pred))

              precision    recall  f1-score   support

           0       0.70      0.80      0.74      1195
           1       0.60      0.47      0.52       772

    accuracy                           0.67      1967
   macro avg       0.65      0.63      0.63      1967
weighted avg       0.66      0.67      0.66      1967

[[953 242]
 [413 359]]


## Hashing Vectorizer ##

In [46]:
def listToString(s):  
    
    # initialize an empty string 
    str1 = ""  
    
    # traverse in the string   
    for ele in s:  
        str1 += ' '+ele   
    
    # return string   
    return str1  

text = [ listToString(tweet) for tweet in tweets['full_tweet']]

In [57]:
from sklearn.feature_extraction.text import HashingVectorizer

hash_vectorizer = HashingVectorizer(n_features=5000)
hash_fit = hash_vectorizer.transform(text)

X = hash_fit
X.shape

(7868, 5000)

In [58]:
y = tweets[target]
freq = y.value_counts()           # count frequency of different classes in loan status
freq/sum(freq)*100  

no     60.739705
yes    39.260295
Name: bullying_trace, dtype: float64

In [59]:
acc, f1 = prediction(X, y, n=0.2)
print(acc, f1)

shape of X: (7868, 5000)
shape of y: (7868,)
shape of X_train: (6294, 5000)
shape of y_train: (6294,)
shape of X_test: (1574, 5000)
shape of y_test: (1574,)
shape of y_pred: (1574, 1)
              precision    recall  f1-score   support

          no       0.71      0.84      0.77       955
         yes       0.65      0.47      0.55       619

    accuracy                           0.69      1574
   macro avg       0.68      0.65      0.66      1574
weighted avg       0.69      0.69      0.68      1574

[[800 155]
 [329 290]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.61      1.00      0.76       955
         yes       0.00      0.00      0.00       619

    accuracy                           0.61      1574
   macro avg       0.30      0.50      0.38      1574
weighted avg       0.37      0.61      0.46      1574

[[  0 619]
 [  0 955]]
0.6925031766200762 0.5451127819548872


  _warn_prf(average, modifier, msg_start, len(result))


In [60]:
acc, f1 = prediction(X, y, n=0.2, cv=True)
print(acc, f1)

shape of X: (7868, 5000)
shape of y: (7868,)
shape of X_train: (6294, 5000)
shape of y_train: (6294,)
shape of X_test: (1574, 5000)
shape of y_test: (1574,)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

shape of y_pred: (1574, 1)
              precision    recall  f1-score   support

          no       0.70      0.80      0.74       949
         yes       0.61      0.47      0.53       625

    accuracy                           0.67      1574
   macro avg       0.65      0.64      0.64      1574
weighted avg       0.66      0.67      0.66      1574

[[759 190]
 [330 295]]
naive model (only no)
              precision    recall  f1-score   support

          no       0.60      1.00      0.75       949
         yes       0.00      0.00      0.00       625

    accuracy                           0.60      1574
   macro avg       0.30      0.50      0.38      1574
weighted avg       0.36      0.60      0.45      1574

[[  0 625]
 [  0 949]]
0.6696315120711563 0.5315315315315314


  _warn_prf(average, modifier, msg_start, len(result))
