# Importing required modules for training and testing ML algorithms

In [1]:
import pandas as pd
import pickle
import numpy as np
from numpy import random
import gensim
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
# from sklearn import model_selection, preprocessing, linear_model, naive_bayes, metrics,svm
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn import decomposition, ensemble
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
%matplotlib inline
import pymongo
from pymongo import MongoClient

unable to import 'smart_open.gcs', disabling that module


# Fetching  Preprocessed Data

In [2]:
df=pd.read_csv("redditlimit20.csv")

In [3]:
df.head()

Unnamed: 0,Searched flair,flair,title,score,id,author,body,created,comms_num,url,time,comments,comments_title_url_body
0,Coronavirus,Coronavirus,lockdown scenes kurnool andhra pradesh already...,1809,g5xgab,drunk_sithlord,,1587573000.0,113,https://i.redd.it/h77492fyybu41.jpg,2020-04-22 22:02:48,expect allow market open couple hours market p...,expect allow market open couple hours market p...
1,Coronavirus,Coronavirus,west bengal chief secretary bans mobile phones...,311,g622u6,Kickass1410,west bengal govt proved incompetent controllin...,1587594000.0,43,https://www.reddit.com/r/india/comments/g622u6...,2020-04-23 03:58:15,see problem close eyes problem solved mean wb ...,see problem close eyes problem solved mean wb ...
2,Coronavirus,Coronavirus,doctor charged rs 2 kurnool bids farewell covi...,245,g610c7,BornAgainHindu,,1587591000.0,8,https://www.thenewsminute.com/article/doctor-w...,2020-04-23 02:51:48,personally know man used teach kurnool medical...,personally know man used teach kurnool medical...
3,Coronavirus,Coronavirus,covidiots arrested paraded making tiktok video...,562,g5uuhi,DenseSpirit5,,1587560000.0,50,https://www.news18.com/news/buzz/covidiots-arr...,2020-04-22 18:15:57,play stupid games win stupid prizes ye kya nat...,play stupid games win stupid prizes ye kya nat...
4,Coronavirus,Coronavirus,indian capital new delhi frequently tops world...,102,g6525y,friskyfrog,,1587604000.0,7,https://i.redd.it/5nth8zxcjeu41.jpg,2020-04-23 06:41:19,rather spending money pollution control order ...,rather spending money pollution control order ...


# Replacing NaN from our DataFrame

In [4]:
df=df.replace(np.nan,'',regex=True)

In [5]:
df.head()

Unnamed: 0,Searched flair,flair,title,score,id,author,body,created,comms_num,url,time,comments,comments_title_url_body
0,Coronavirus,Coronavirus,lockdown scenes kurnool andhra pradesh already...,1809,g5xgab,drunk_sithlord,,1587573000.0,113,https://i.redd.it/h77492fyybu41.jpg,2020-04-22 22:02:48,expect allow market open couple hours market p...,expect allow market open couple hours market p...
1,Coronavirus,Coronavirus,west bengal chief secretary bans mobile phones...,311,g622u6,Kickass1410,west bengal govt proved incompetent controllin...,1587594000.0,43,https://www.reddit.com/r/india/comments/g622u6...,2020-04-23 03:58:15,see problem close eyes problem solved mean wb ...,see problem close eyes problem solved mean wb ...
2,Coronavirus,Coronavirus,doctor charged rs 2 kurnool bids farewell covi...,245,g610c7,BornAgainHindu,,1587591000.0,8,https://www.thenewsminute.com/article/doctor-w...,2020-04-23 02:51:48,personally know man used teach kurnool medical...,personally know man used teach kurnool medical...
3,Coronavirus,Coronavirus,covidiots arrested paraded making tiktok video...,562,g5uuhi,DenseSpirit5,,1587560000.0,50,https://www.news18.com/news/buzz/covidiots-arr...,2020-04-22 18:15:57,play stupid games win stupid prizes ye kya nat...,play stupid games win stupid prizes ye kya nat...
4,Coronavirus,Coronavirus,indian capital new delhi frequently tops world...,102,g6525y,friskyfrog,,1587604000.0,7,https://i.redd.it/5nth8zxcjeu41.jpg,2020-04-23 06:41:19,rather spending money pollution control order ...,rather spending money pollution control order ...


# Listing out all the Columns of our DataFrame

In [6]:
df.keys()

Index(['Searched flair', 'flair', 'title', 'score', 'id', 'author', 'body',
       'created', 'comms_num', 'url', 'time', 'comments',
       'comments_title_url_body'],
      dtype='object')

In [7]:
flair_list=["Coronavirus","Non-Political","Politics","Science/Technology","Policy/Economy","Photography","AskIndia","Scheduled","Sports","Food","Business/Finanace","[R]eddiquette"]

# Naive-Bayes Algorithm

In [8]:
def nb_classifier(X_train, X_test, y_train, y_test,feature):
  
    from sklearn.naive_bayes import MultinomialNB
    nb = Pipeline([('vect', CountVectorizer()),
                    ('tfidf', TfidfTransformer()),
                     ('clf', MultinomialNB()),
                    ])
    nb.fit(X_train, y_train)

    y_pred = nb.predict(X_test)

    print('accuracy %s' % accuracy_score(y_pred, y_test))
    print(classification_report(y_test, y_pred,target_names=flair_list))
    

# Linear SVM Algorithm

### We save this model through Pickle as this model comes out to be the most accurate 

In [9]:
def linear_svm(X_train, X_test, y_train, y_test,feature):
  
    from sklearn.linear_model import SGDClassifier
    sgd = Pipeline([('vect', CountVectorizer()),
                  ('tfidf', TfidfTransformer()),
                  ('clf', SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, random_state=42, max_iter=5, tol=None)),
                 ])
    sgd.fit(X_train, y_train)

    y_pred = sgd.predict(X_test)

    print('accuracy %s' % accuracy_score(y_pred, y_test))
    print(classification_report(y_test, y_pred,target_names=flair_list))
    if feature=="tcbu":
        filename='SVM_ctbu.sav'
        pickle.dump(sgd,open(filename,'wb'))

# Logistic Regression Algorithm

In [10]:
def logistic_reg(X_train, X_test, y_train, y_test,feature):

    from sklearn.linear_model import LogisticRegression

    logreg = Pipeline([('vect', CountVectorizer()),
                    ('tfidf', TfidfTransformer()),
                    ('clf', LogisticRegression(n_jobs=1, C=1e5)),
                    ])
    logreg.fit(X_train, y_train)

    y_pred = logreg.predict(X_test)

    print('accuracy %s' % accuracy_score(y_pred, y_test))
    print(classification_report(y_test, y_pred,target_names=flair_list))
    

# Random Forest Algorithm

In [11]:

def randomforest(X_train, X_test, y_train, y_test,feature):
  
    from sklearn.ensemble import RandomForestClassifier
  
    ranfor = Pipeline([('vect', CountVectorizer()),
                  ('tfidf', TfidfTransformer()),
                  ('clf', RandomForestClassifier(n_estimators = 1000, random_state = 42)),
                 ])
    ranfor.fit(X_train, y_train)

    y_pred = ranfor.predict(X_test)

    print('accuracy %s' % accuracy_score(y_pred, y_test))
    print(classification_report(y_test, y_pred,target_names=flair_list))
    

# MLP Classifier

In [None]:
def mlpclassifier(X_train, X_test, y_train, y_test,feature):
  
    from sklearn.neural_network import MLPClassifier
  
    mlp = Pipeline([('vect', CountVectorizer()),
                  ('tfidf', TfidfTransformer()),
                  ('clf', MLPClassifier(hidden_layer_sizes=(30,30,30))),
                 ])
    mlp.fit(X_train, y_train)

    y_pred = mlp.predict(X_test)

    print('accuracy %s' % accuracy_score(y_pred, y_test))
    print(classification_report(y_test, y_pred,target_names=flair_list))
    

# Splitting the Data into Train and Test Sets

In [13]:
def train_test_splitting(X,Y,feature):
    X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.1,random_state=42)
    
    print("Result of Naives-Bayes Classifier")
    nb_classifier(X_train,X_test,Y_train,Y_test,feature)
    print("Result of Linear Support Vector Machine")
    linear_svm(X_train,X_test,Y_train,Y_test,feature)
    print("Result of Logistic Regression")
    logistic_reg(X_train,X_test,Y_train,Y_test,feature)
    print("Result of Random Forest")
    randomforest(X_train,X_test,Y_train,Y_test,feature)
    print("Result of MLP Classifier")
    mlpclassifier(X_train,X_test,Y_train,Y_test,feature)

# Combining different features to chech which is the most accurate

In [14]:
df['titles_comments']=df.title.str.cat(df['comments'].astype('str'),sep=" ")
df['titles_comments_body']=df.titles_comments.str.cat(df['body'].astype('str'),sep=" ")
df['titles_comments_body_url']=df.titles_comments_body.str.cat(df['url'].astype('str'),sep=" ")
df['titles_comments_url']=df.titles_comments.str.cat(df['url'].astype('str'),sep=" ")
df['titles_comments_url_score']=df.titles_comments_url.str.cat(df['score'].astype('str'),sep=" ")

In [15]:
df.head()

Unnamed: 0,Searched flair,flair,title,score,id,author,body,created,comms_num,url,time,comments,comments_title_url_body,titles_comments,titles_comments_body,titles_comments_body_url,titles_comments_url,titles_comments_url_score
0,Coronavirus,Coronavirus,lockdown scenes kurnool andhra pradesh already...,1809,g5xgab,drunk_sithlord,,1587573000.0,113,https://i.redd.it/h77492fyybu41.jpg,2020-04-22 22:02:48,expect allow market open couple hours market p...,expect allow market open couple hours market p...,lockdown scenes kurnool andhra pradesh already...,lockdown scenes kurnool andhra pradesh already...,lockdown scenes kurnool andhra pradesh already...,lockdown scenes kurnool andhra pradesh already...,lockdown scenes kurnool andhra pradesh already...
1,Coronavirus,Coronavirus,west bengal chief secretary bans mobile phones...,311,g622u6,Kickass1410,west bengal govt proved incompetent controllin...,1587594000.0,43,https://www.reddit.com/r/india/comments/g622u6...,2020-04-23 03:58:15,see problem close eyes problem solved mean wb ...,see problem close eyes problem solved mean wb ...,west bengal chief secretary bans mobile phones...,west bengal chief secretary bans mobile phones...,west bengal chief secretary bans mobile phones...,west bengal chief secretary bans mobile phones...,west bengal chief secretary bans mobile phones...
2,Coronavirus,Coronavirus,doctor charged rs 2 kurnool bids farewell covi...,245,g610c7,BornAgainHindu,,1587591000.0,8,https://www.thenewsminute.com/article/doctor-w...,2020-04-23 02:51:48,personally know man used teach kurnool medical...,personally know man used teach kurnool medical...,doctor charged rs 2 kurnool bids farewell covi...,doctor charged rs 2 kurnool bids farewell covi...,doctor charged rs 2 kurnool bids farewell covi...,doctor charged rs 2 kurnool bids farewell covi...,doctor charged rs 2 kurnool bids farewell covi...
3,Coronavirus,Coronavirus,covidiots arrested paraded making tiktok video...,562,g5uuhi,DenseSpirit5,,1587560000.0,50,https://www.news18.com/news/buzz/covidiots-arr...,2020-04-22 18:15:57,play stupid games win stupid prizes ye kya nat...,play stupid games win stupid prizes ye kya nat...,covidiots arrested paraded making tiktok video...,covidiots arrested paraded making tiktok video...,covidiots arrested paraded making tiktok video...,covidiots arrested paraded making tiktok video...,covidiots arrested paraded making tiktok video...
4,Coronavirus,Coronavirus,indian capital new delhi frequently tops world...,102,g6525y,friskyfrog,,1587604000.0,7,https://i.redd.it/5nth8zxcjeu41.jpg,2020-04-23 06:41:19,rather spending money pollution control order ...,rather spending money pollution control order ...,indian capital new delhi frequently tops world...,indian capital new delhi frequently tops world...,indian capital new delhi frequently tops world...,indian capital new delhi frequently tops world...,indian capital new delhi frequently tops world...


In [16]:
xTitle=df.title
xBody=df.body
xAuthor=df.author
xScore=df.score.astype('str')
xNumcomm=df.comms_num.astype('str')
xComments=df.comments
xUrl=df.url
Y=df.flair
X_ctub=df.comments_title_url_body
X_ct=df.titles_comments
X_ctb=df.titles_comments_body
X_ctu=df.titles_comments_url
X_ctus=df.titles_comments_url_score

# Performing Machine Learning Algorithms according to different features

In [17]:
print("Title as feature--------------------------------------------------")
train_test_splitting(xTitle,Y,"Title")

print("Body as feature--------------------------------------------------")
train_test_splitting(xBody,Y,"Body")

print("Author as feature--------------------------------------------------")
train_test_splitting(xAuthor,Y,"Author")

print("Score as feature--------------------------------------------------")
train_test_splitting(xScore,Y,"Score")

print("NumComm as feature--------------------------------------------------")
train_test_splitting(xNumcomm,Y,"NumComm")

print("Comments as feature--------------------------------------------------")
train_test_splitting(xComments,Y,"Comments")

print("Url as feature--------------------------------------------------")
train_test_splitting(xUrl,Y,"Url")

print("Comment+title+url+body as feature--------------------------------------------------")
train_test_splitting(X_ctub,Y,"tcbu")

print("Comment+title as feature--------------------------------------------------")
train_test_splitting(X_ct,Y,"ct")

print("Comment+title+body as feature--------------------------------------------------")
train_test_splitting(X_ctb,Y,"ctb")

print("Comment+title+url as feature--------------------------------------------------")
train_test_splitting(X_ctu,Y,"ctu")

print("Comment+title+url+score as feature--------------------------------------------------")
train_test_splitting(X_ctus,Y,"ctus")

Title as feature--------------------------------------------------
Result of Naives-Bayes Classifier
accuracy 0.4866666666666667
                    precision    recall  f1-score   support

       Coronavirus       0.39      0.58      0.47        12
     Non-Political       0.33      0.45      0.38        11
          Politics       0.37      0.67      0.48        15
Science/Technology       0.71      0.36      0.48        14
    Policy/Economy       0.21      0.50      0.30         8
       Photography       0.88      0.70      0.78        10
          AskIndia       0.29      0.20      0.24        10
         Scheduled       0.62      0.36      0.45        14
            Sports       0.82      1.00      0.90         9
              Food       0.86      0.29      0.43        21
 Business/Finanace       0.73      0.79      0.76        14
     [R]eddiquette       0.25      0.17      0.20        12

          accuracy                           0.49       150
         macro avg       0.54

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


accuracy 0.49333333333333335
                    precision    recall  f1-score   support

       Coronavirus       0.22      0.75      0.34        12
     Non-Political       0.42      0.45      0.43        11
          Politics       0.53      0.67      0.59        15
Science/Technology       0.50      0.29      0.36        14
    Policy/Economy       0.27      0.38      0.32         8
       Photography       0.75      0.60      0.67        10
          AskIndia       0.50      0.30      0.37        10
         Scheduled       0.67      0.43      0.52        14
            Sports       0.90      1.00      0.95         9
              Food       0.75      0.29      0.41        21
 Business/Finanace       0.90      0.64      0.75        14
     [R]eddiquette       0.50      0.33      0.40        12

          accuracy                           0.49       150
         macro avg       0.58      0.51      0.51       150
      weighted avg       0.59      0.49      0.50       150

Result o

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))


accuracy 0.25333333333333335
                    precision    recall  f1-score   support

       Coronavirus       0.34      0.83      0.49        12
     Non-Political       0.33      0.09      0.14        11
          Politics       0.00      0.00      0.00        15
Science/Technology       0.60      0.21      0.32        14
    Policy/Economy       0.00      0.00      0.00         8
       Photography       0.11      1.00      0.19        10
          AskIndia       0.67      0.20      0.31        10
         Scheduled       0.00      0.00      0.00        14
            Sports       0.90      1.00      0.95         9
              Food       0.33      0.05      0.08        21
 Business/Finanace       0.00      0.00      0.00        14
     [R]eddiquette       0.67      0.17      0.27        12

          accuracy                           0.25       150
         macro avg       0.33      0.30      0.23       150
      weighted avg       0.31      0.25      0.20       150

Result o

  _warn_prf(average, modifier, msg_start, len(result))


accuracy 0.24666666666666667
                    precision    recall  f1-score   support

       Coronavirus       0.62      0.67      0.64        12
     Non-Political       0.67      0.18      0.29        11
          Politics       0.00      0.00      0.00        15
Science/Technology       1.00      0.14      0.25        14
    Policy/Economy       0.00      0.00      0.00         8
       Photography       0.11      1.00      0.19        10
          AskIndia       0.27      0.30      0.29        10
         Scheduled       0.00      0.00      0.00        14
            Sports       0.90      1.00      0.95         9
              Food       0.33      0.05      0.08        21
 Business/Finanace       0.00      0.00      0.00        14
     [R]eddiquette       0.22      0.17      0.19        12

          accuracy                           0.25       150
         macro avg       0.34      0.29      0.24       150
      weighted avg       0.34      0.25      0.21       150

Author a

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


accuracy 0.3333333333333333
                    precision    recall  f1-score   support

       Coronavirus       0.00      0.00      0.00        12
     Non-Political       0.29      0.18      0.22        11
          Politics       0.42      0.33      0.37        15
Science/Technology       0.43      0.21      0.29        14
    Policy/Economy       0.10      1.00      0.19         8
       Photography       1.00      0.30      0.46        10
          AskIndia       0.44      0.40      0.42        10
         Scheduled       0.73      0.57      0.64        14
            Sports       0.90      1.00      0.95         9
              Food       1.00      0.05      0.09        21
 Business/Finanace       0.75      0.21      0.33        14
     [R]eddiquette       0.67      0.33      0.44        12

          accuracy                           0.33       150
         macro avg       0.56      0.38      0.37       150
      weighted avg       0.59      0.33      0.34       150

Result of

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


accuracy 0.3333333333333333
                    precision    recall  f1-score   support

       Coronavirus       0.00      0.00      0.00        12
     Non-Political       0.33      0.18      0.24        11
          Politics       0.47      0.47      0.47        15
Science/Technology       0.43      0.21      0.29        14
    Policy/Economy       0.10      1.00      0.18         8
       Photography       1.00      0.30      0.46        10
          AskIndia       0.44      0.40      0.42        10
         Scheduled       0.88      0.50      0.64        14
            Sports       0.90      1.00      0.95         9
              Food       0.00      0.00      0.00        21
 Business/Finanace       0.80      0.29      0.42        14
     [R]eddiquette       0.75      0.25      0.38        12

          accuracy                           0.33       150
         macro avg       0.51      0.38      0.37       150
      weighted avg       0.48      0.33      0.34       150

Score as 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))


accuracy 0.11333333333333333
                    precision    recall  f1-score   support

       Coronavirus       0.00      0.00      0.00        12
     Non-Political       0.00      0.00      0.00        11
          Politics       0.20      0.13      0.16        15
Science/Technology       0.00      0.00      0.00        14
    Policy/Economy       0.00      0.00      0.00         8
       Photography       0.50      0.10      0.17        10
          AskIndia       0.07      0.10      0.08        10
         Scheduled       0.17      0.07      0.10        14
            Sports       0.12      0.44      0.19         9
              Food       0.25      0.05      0.08        21
 Business/Finanace       0.11      0.43      0.17        14
     [R]eddiquette       0.09      0.08      0.09        12

          accuracy                           0.11       150
         macro avg       0.13      0.12      0.09       150
      weighted avg       0.13      0.11      0.09       150

Result o

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


accuracy 0.10666666666666667
                    precision    recall  f1-score   support

       Coronavirus       0.10      0.08      0.09        12
     Non-Political       0.00      0.00      0.00        11
          Politics       0.00      0.00      0.00        15
Science/Technology       0.20      0.07      0.11        14
    Policy/Economy       0.00      0.00      0.00         8
       Photography       0.12      0.10      0.11        10
          AskIndia       0.08      0.10      0.09        10
         Scheduled       0.17      0.07      0.10        14
            Sports       0.12      0.44      0.20         9
              Food       0.25      0.05      0.08        21
 Business/Finanace       0.10      0.43      0.17        14
     [R]eddiquette       0.00      0.00      0.00        12

          accuracy                           0.11       150
         macro avg       0.10      0.11      0.08       150
      weighted avg       0.11      0.11      0.08       150

NumComm 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))


accuracy 0.12
                    precision    recall  f1-score   support

       Coronavirus       0.17      0.25      0.20        12
     Non-Political       0.00      0.00      0.00        11
          Politics       0.16      0.67      0.25        15
Science/Technology       0.00      0.00      0.00        14
    Policy/Economy       0.00      0.00      0.00         8
       Photography       0.00      0.00      0.00        10
          AskIndia       0.14      0.20      0.17        10
         Scheduled       0.21      0.21      0.21        14
            Sports       0.00      0.00      0.00         9
              Food       0.00      0.00      0.00        21
 Business/Finanace       0.00      0.00      0.00        14
     [R]eddiquette       0.00      0.00      0.00        12

          accuracy                           0.12       150
         macro avg       0.06      0.11      0.07       150
      weighted avg       0.06      0.12      0.07       150

Result of Random Forest

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


accuracy 0.12
                    precision    recall  f1-score   support

       Coronavirus       0.18      0.25      0.21        12
     Non-Political       0.00      0.00      0.00        11
          Politics       0.16      0.67      0.25        15
Science/Technology       0.00      0.00      0.00        14
    Policy/Economy       0.00      0.00      0.00         8
       Photography       0.00      0.00      0.00        10
          AskIndia       0.17      0.20      0.18        10
         Scheduled       0.27      0.21      0.24        14
            Sports       0.00      0.00      0.00         9
              Food       0.00      0.00      0.00        21
 Business/Finanace       0.00      0.00      0.00        14
     [R]eddiquette       0.00      0.00      0.00        12

          accuracy                           0.12       150
         macro avg       0.06      0.11      0.07       150
      weighted avg       0.07      0.12      0.08       150

Comments as feature----

  _warn_prf(average, modifier, msg_start, len(result))


accuracy 0.5866666666666667
                    precision    recall  f1-score   support

       Coronavirus       0.53      0.75      0.62        12
     Non-Political       0.45      0.45      0.45        11
          Politics       0.75      0.40      0.52        15
Science/Technology       0.60      0.64      0.62        14
    Policy/Economy       0.60      0.38      0.46         8
       Photography       0.89      0.80      0.84        10
          AskIndia       0.33      0.50      0.40        10
         Scheduled       0.80      0.86      0.83        14
            Sports       0.75      1.00      0.86         9
              Food       0.80      0.38      0.52        21
 Business/Finanace       0.44      0.79      0.56        14
     [R]eddiquette       0.38      0.25      0.30        12

          accuracy                           0.59       150
         macro avg       0.61      0.60      0.58       150
      weighted avg       0.62      0.59      0.58       150

Result of

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


accuracy 0.5666666666666667
                    precision    recall  f1-score   support

       Coronavirus       0.38      0.67      0.48        12
     Non-Political       0.50      0.55      0.52        11
          Politics       0.47      0.53      0.50        15
Science/Technology       0.64      0.64      0.64        14
    Policy/Economy       0.25      0.25      0.25         8
       Photography       1.00      0.50      0.67        10
          AskIndia       0.33      0.20      0.25        10
         Scheduled       0.83      0.71      0.77        14
            Sports       1.00      1.00      1.00         9
              Food       0.80      0.57      0.67        21
 Business/Finanace       0.50      0.71      0.59        14
     [R]eddiquette       0.36      0.33      0.35        12

          accuracy                           0.57       150
         macro avg       0.59      0.56      0.56       150
      weighted avg       0.60      0.57      0.57       150

Result of

  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))


accuracy 0.32666666666666666
                    precision    recall  f1-score   support

       Coronavirus       0.00      0.00      0.00        12
     Non-Political       0.12      0.09      0.11        11
          Politics       0.45      0.60      0.51        15
Science/Technology       0.50      0.07      0.12        14
    Policy/Economy       0.00      0.00      0.00         8
       Photography       0.33      0.90      0.49        10
          AskIndia       0.43      0.30      0.35        10
         Scheduled       0.56      0.36      0.43        14
            Sports       0.16      1.00      0.27         9
              Food       0.43      0.14      0.21        21
 Business/Finanace       0.89      0.57      0.70        14
     [R]eddiquette       0.50      0.08      0.14        12

          accuracy                           0.33       150
         macro avg       0.36      0.34      0.28       150
      weighted avg       0.40      0.33      0.29       150

Result o

  _warn_prf(average, modifier, msg_start, len(result))


accuracy 0.2866666666666667
                    precision    recall  f1-score   support

       Coronavirus       0.00      0.00      0.00        12
     Non-Political       0.00      0.00      0.00        11
          Politics       0.35      0.53      0.42        15
Science/Technology       0.08      0.07      0.07        14
    Policy/Economy       0.00      0.00      0.00         8
       Photography       0.38      0.80      0.52        10
          AskIndia       0.33      0.10      0.15        10
         Scheduled       0.33      0.14      0.20        14
            Sports       0.75      0.33      0.46         9
              Food       0.19      0.52      0.27        21
 Business/Finanace       0.89      0.57      0.70        14
     [R]eddiquette       1.00      0.08      0.15        12

          accuracy                           0.29       150
         macro avg       0.36      0.26      0.25       150
      weighted avg       0.35      0.29      0.26       150

Comment+t

  _warn_prf(average, modifier, msg_start, len(result))


accuracy 0.4
                    precision    recall  f1-score   support

       Coronavirus       0.38      0.42      0.40        12
     Non-Political       0.57      0.36      0.44        11
          Politics       0.60      0.40      0.48        15
Science/Technology       1.00      0.21      0.35        14
    Policy/Economy       0.33      0.25      0.29         8
       Photography       1.00      0.50      0.67        10
          AskIndia       0.71      0.50      0.59        10
         Scheduled       0.32      0.93      0.47        14
            Sports       0.39      1.00      0.56         9
              Food       0.00      0.00      0.00        21
 Business/Finanace       1.00      0.36      0.53        14
     [R]eddiquette       0.10      0.25      0.14        12

          accuracy                           0.40       150
         macro avg       0.53      0.43      0.41       150
      weighted avg       0.51      0.40      0.38       150

Result of Linear Support

  _warn_prf(average, modifier, msg_start, len(result))


accuracy 0.6733333333333333
                    precision    recall  f1-score   support

       Coronavirus       0.56      0.75      0.64        12
     Non-Political       0.67      0.55      0.60        11
          Politics       0.58      0.73      0.65        15
Science/Technology       0.69      0.79      0.73        14
    Policy/Economy       0.80      0.50      0.62         8
       Photography       0.75      0.90      0.82        10
          AskIndia       0.42      0.50      0.45        10
         Scheduled       0.69      0.79      0.73        14
            Sports       0.82      1.00      0.90         9
              Food       0.91      0.48      0.62        21
 Business/Finanace       0.75      0.86      0.80        14
     [R]eddiquette       0.57      0.33      0.42        12

          accuracy                           0.67       150
         macro avg       0.68      0.68      0.67       150
      weighted avg       0.69      0.67      0.66       150

Result of

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


accuracy 0.64
                    precision    recall  f1-score   support

       Coronavirus       0.50      0.75      0.60        12
     Non-Political       0.56      0.45      0.50        11
          Politics       0.55      0.73      0.63        15
Science/Technology       0.73      0.79      0.76        14
    Policy/Economy       0.31      0.50      0.38         8
       Photography       0.78      0.70      0.74        10
          AskIndia       0.47      0.70      0.56        10
         Scheduled       0.85      0.79      0.81        14
            Sports       1.00      1.00      1.00         9
              Food       0.75      0.43      0.55        21
 Business/Finanace       1.00      0.71      0.83        14
     [R]eddiquette       0.43      0.25      0.32        12

          accuracy                           0.64       150
         macro avg       0.66      0.65      0.64       150
      weighted avg       0.68      0.64      0.64       150

Result of Random Forest

  _warn_prf(average, modifier, msg_start, len(result))


accuracy 0.64
                    precision    recall  f1-score   support

       Coronavirus       0.53      0.75      0.62        12
     Non-Political       0.45      0.45      0.45        11
          Politics       0.62      0.67      0.65        15
Science/Technology       0.67      0.71      0.69        14
    Policy/Economy       0.60      0.38      0.46         8
       Photography       0.90      0.90      0.90        10
          AskIndia       0.33      0.40      0.36        10
         Scheduled       0.92      0.79      0.85        14
            Sports       0.82      1.00      0.90         9
              Food       0.85      0.52      0.65        21
 Business/Finanace       0.60      0.86      0.71        14
     [R]eddiquette       0.38      0.25      0.30        12

          accuracy                           0.64       150
         macro avg       0.64      0.64      0.63       150
      weighted avg       0.65      0.64      0.63       150

Result of Logistic Regr

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


accuracy 0.64
                    precision    recall  f1-score   support

       Coronavirus       0.50      0.67      0.57        12
     Non-Political       0.45      0.45      0.45        11
          Politics       0.69      0.73      0.71        15
Science/Technology       0.71      0.71      0.71        14
    Policy/Economy       0.27      0.38      0.32         8
       Photography       0.89      0.80      0.84        10
          AskIndia       0.38      0.50      0.43        10
         Scheduled       0.73      0.79      0.76        14
            Sports       1.00      1.00      1.00         9
              Food       0.75      0.57      0.65        21
 Business/Finanace       0.91      0.71      0.80        14
     [R]eddiquette       0.44      0.33      0.38        12

          accuracy                           0.64       150
         macro avg       0.64      0.64      0.64       150
      weighted avg       0.66      0.64      0.65       150

Result of Random Forest

  _warn_prf(average, modifier, msg_start, len(result))


accuracy 0.66
                    precision    recall  f1-score   support

       Coronavirus       0.62      0.83      0.71        12
     Non-Political       0.50      0.55      0.52        11
          Politics       0.67      0.67      0.67        15
Science/Technology       0.62      0.71      0.67        14
    Policy/Economy       0.75      0.38      0.50         8
       Photography       0.90      0.90      0.90        10
          AskIndia       0.45      0.50      0.48        10
         Scheduled       0.85      0.79      0.81        14
            Sports       0.82      1.00      0.90         9
              Food       0.83      0.48      0.61        21
 Business/Finanace       0.55      0.79      0.65        14
     [R]eddiquette       0.50      0.42      0.45        12

          accuracy                           0.66       150
         macro avg       0.67      0.67      0.66       150
      weighted avg       0.68      0.66      0.65       150

Result of Logistic Regr

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


accuracy 0.6533333333333333
                    precision    recall  f1-score   support

       Coronavirus       0.47      0.67      0.55        12
     Non-Political       0.50      0.45      0.48        11
          Politics       0.73      0.73      0.73        15
Science/Technology       0.71      0.71      0.71        14
    Policy/Economy       0.33      0.50      0.40         8
       Photography       0.89      0.80      0.84        10
          AskIndia       0.44      0.80      0.57        10
         Scheduled       0.85      0.79      0.81        14
            Sports       1.00      1.00      1.00         9
              Food       0.83      0.48      0.61        21
 Business/Finanace       0.83      0.71      0.77        14
     [R]eddiquette       0.44      0.33      0.38        12

          accuracy                           0.65       150
         macro avg       0.67      0.66      0.66       150
      weighted avg       0.69      0.65      0.66       150

Result of

  _warn_prf(average, modifier, msg_start, len(result))


accuracy 0.64
                    precision    recall  f1-score   support

       Coronavirus       0.53      0.75      0.62        12
     Non-Political       0.50      0.45      0.48        11
          Politics       0.53      0.67      0.59        15
Science/Technology       0.73      0.79      0.76        14
    Policy/Economy       0.75      0.38      0.50         8
       Photography       0.75      0.90      0.82        10
          AskIndia       0.30      0.30      0.30        10
         Scheduled       0.69      0.79      0.73        14
            Sports       0.82      1.00      0.90         9
              Food       0.85      0.52      0.65        21
 Business/Finanace       0.71      0.86      0.77        14
     [R]eddiquette       0.50      0.25      0.33        12

          accuracy                           0.64       150
         macro avg       0.64      0.64      0.62       150
      weighted avg       0.65      0.64      0.63       150

Result of Logistic Regr

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


accuracy 0.6533333333333333
                    precision    recall  f1-score   support

       Coronavirus       0.44      0.67      0.53        12
     Non-Political       0.50      0.45      0.48        11
          Politics       0.59      0.87      0.70        15
Science/Technology       0.79      0.79      0.79        14
    Policy/Economy       0.33      0.38      0.35         8
       Photography       0.89      0.80      0.84        10
          AskIndia       0.43      0.60      0.50        10
         Scheduled       0.85      0.79      0.81        14
            Sports       1.00      1.00      1.00         9
              Food       0.83      0.48      0.61        21
 Business/Finanace       1.00      0.71      0.83        14
     [R]eddiquette       0.40      0.33      0.36        12

          accuracy                           0.65       150
         macro avg       0.67      0.65      0.65       150
      weighted avg       0.69      0.65      0.66       150

Result of

  _warn_prf(average, modifier, msg_start, len(result))


accuracy 0.66
                    precision    recall  f1-score   support

       Coronavirus       0.53      0.75      0.62        12
     Non-Political       0.50      0.45      0.48        11
          Politics       0.56      0.67      0.61        15
Science/Technology       0.73      0.79      0.76        14
    Policy/Economy       0.67      0.50      0.57         8
       Photography       0.75      0.90      0.82        10
          AskIndia       0.36      0.40      0.38        10
         Scheduled       0.71      0.86      0.77        14
            Sports       0.82      1.00      0.90         9
              Food       0.85      0.52      0.65        21
 Business/Finanace       0.86      0.86      0.86        14
     [R]eddiquette       0.50      0.25      0.33        12

          accuracy                           0.66       150
         macro avg       0.65      0.66      0.65       150
      weighted avg       0.67      0.66      0.65       150

Result of Logistic Regr

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


accuracy 0.6533333333333333
                    precision    recall  f1-score   support

       Coronavirus       0.44      0.67      0.53        12
     Non-Political       0.50      0.45      0.48        11
          Politics       0.56      0.67      0.61        15
Science/Technology       0.79      0.79      0.79        14
    Policy/Economy       0.40      0.50      0.44         8
       Photography       1.00      0.80      0.89        10
          AskIndia       0.46      0.60      0.52        10
         Scheduled       0.79      0.79      0.79        14
            Sports       1.00      1.00      1.00         9
              Food       0.71      0.57      0.63        21
 Business/Finanace       1.00      0.71      0.83        14
     [R]eddiquette       0.44      0.33      0.38        12

          accuracy                           0.65       150
         macro avg       0.67      0.66      0.66       150
      weighted avg       0.68      0.65      0.66       150

Result of