In [6]:
def SentimentAnalysis(df, n_samples=20000, process_column="text", n_common=5000):
    import re
    import nltk
    from nltk import word_tokenize
    from nltk.stem import WordNetLemmatizer
    from nltk.stem import SnowballStemmer
    from nltk.corpus import stopwords
    from nltk.probability import FreqDist
    from nltk.corpus import names
    from nltk.sentiment.vader import SentimentIntensityAnalyzer
    
    def clean_up(s):
        s = re.sub("https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+", ' ', s)
        s = re.sub('\d+', ' ', s)
        s = re.sub('\W+', ' ', s)
        s = re.sub('_', ' ', s)
        return s.lower().strip()

    def tokenize(s):
        return word_tokenize(clean_up(s))
    
    def stem_and_lemmatize(s):
        return [WordNetLemmatizer().lemmatize(SnowballStemmer("english").stem(x)) for x in tokenize(s)]

    def remove_stopwords(s):
        return [w for w in stem_and_lemmatize(s) if not w in stopwords.words('english')]
    
    def find_features(document):
        top = [x[0] for x in FreqDist(lista).most_common(n_common)]
        words = set(document)
        features = {}
        for w in top:
            features[w] = (w in words)
        s = SentimentIntensityAnalyzer().polarity_scores(" ".join(document))
        if s["pos"] > 0.2:
            s = True
        else:
            s = False
        return (features, s)
    
    lista = []
    df_sample = df.sample(n=n_samples)
    df_sample['text_processed'] = df_sample[process_column].apply(remove_stopwords)
    for x in df_sample['text_processed']:
        lista += x
    feature = list(df_sample['text_processed'].apply(find_features))
    train_set, test_set = feature[int(n_samples*0.025):], feature[:int(n_samples*0.025)]
    classifier = nltk.NaiveBayesClassifier.train(train_set)
    
    return "Classifier accuracy percent:",(nltk.classify.accuracy(classifier, test_set))*100

In [7]:
import pandas as pd

df = pd.read_csv("Sentiment140.csv")

print(SentimentAnalysis(df, n_samples=5000))

('Classifier accuracy percent:', 84.8)
