In [1]:
import pandas as pd

train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

KeyboardInterrupt: 

In [None]:
import nltk
nltk.download('stopwords')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string

stop_words = set(stopwords.words('english'))
punctuations = set(string.punctuation)

def preprocess_text(text):
    words = word_tokenize(text.lower())
    words = [word for word in words if word not in stop_words and word not in punctuations]
    return ' '.join(words)

train_df['text'] = train_df['text'].apply(preprocess_text)
test_df['text'] = test_df['text'].apply(preprocess_text)


In [None]:
from sklearn.feature_extraction.text import CountVectorizer

vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(train_df['text'])
y_train = train_df['label']
X_test = vectorizer.transform(test_df['text'])

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, KFold

kfold = KFold(n_splits=10, shuffle=True)
lr_model = LogisticRegression()

lr_scores = cross_val_score(lr_model, X_train, y_train, cv=kfold, scoring='f1_macro')
print('Logistic Regression F1 Score: %.3f (%.3f)' % (lr_scores.mean(), lr_scores.std()))

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

classifiers = [
    LogisticRegression(),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    MultinomialNB(),
    LinearSVC(),
    XGBClassifier(),
    LGBMClassifier(),
    CatBoostClassifier()
]

for classifier in classifiers:
    kfold = KFold(n_splits=10, shuffle=True)
    scores = cross_val_score(classifier, X_train, y_train, cv=kfold, scoring='f1_macro')
    print('%s F1 Score: %.3f (%.3f)' % (classifier.__class__.__name__, scores.mean(), scores.std()))