In [1]:
from nltk.corpus import twitter_samples

In [2]:
print(twitter_samples.fileids())

['negative_tweets.json', 'positive_tweets.json', 'tweets.20150430-223406.json']


In [3]:
neg_tweet = twitter_samples.strings('negative_tweets.json')
len(neg_tweet)

5000

In [4]:
pos_tweet = twitter_samples.strings('positive_tweets.json')
len(pos_tweet)

5000

In [5]:
all_tweet =twitter_samples.strings('tweets.20150430-223406.json')
len(all_tweet)

20000

In [6]:
from nltk.tokenize import TweetTokenizer

In [7]:
tweet_tokenizer = TweetTokenizer(preserve_case=False, strip_handles=True, reduce_len=True)
for tweet in pos_tweet[:5]:
    print (tweet_tokenizer.tokenize(tweet))

['#followfriday', 'for', 'being', 'top', 'engaged', 'members', 'in', 'my', 'community', 'this', 'week', ':)']
['hey', 'james', '!', 'how', 'odd', ':/', 'please', 'call', 'our', 'contact', 'centre', 'on', '02392441234', 'and', 'we', 'will', 'be', 'able', 'to', 'assist', 'you', ':)', 'many', 'thanks', '!']
['we', 'had', 'a', 'listen', 'last', 'night', ':)', 'as', 'you', 'bleed', 'is', 'an', 'amazing', 'track', '.', 'when', 'are', 'you', 'in', 'scotland', '?', '!']
['congrats', ':)']
['yeaaah', 'yipppy', '!', '!', '!', 'my', 'accnt', 'verified', 'rqst', 'has', 'succeed', 'got', 'a', 'blue', 'tick', 'mark', 'on', 'my', 'fb', 'profile', ':)', 'in', '15', 'days']


In [8]:
import string
import re
 
from nltk.corpus import stopwords 
stopwords_english = stopwords.words('english')
 
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()
 
from nltk.tokenize import TweetTokenizer

In [9]:
emoticons_happy = set([
    ':-)', ':)', ';)', ':o)', ':]', ':3', ':c)', ':>', '=]', '8)', '=)', ':}',
    ':^)', ':-D', ':D', '8-D', '8D', 'x-D', 'xD', 'X-D', 'XD', '=-D', '=D',
    '=-3', '=3', ':-))', ":'-)", ":')", ':*', ':^*', '>:P', ':-P', ':P', 'X-P',
    'x-p', 'xp', 'XP', ':-p', ':p', '=p', ':-b', ':b', '>:)', '>;)', '>:-)',
    '<3'
    ])

In [10]:
emoticons_sad = set([
    ':L', ':-/', '>:/', ':S', '>:[', ':@', ':-(', ':[', ':-||', '=L', ':<',
    ':-[', ':-<', '=\\', '=/', '>:(', ':(', '>.<', ":'-(", ":'(", ':\\', ':-c',
    ':c', ':{', '>:\\', ';('
    ])

In [11]:
emoticons = emoticons_happy.union(emoticons_sad)

In [12]:
def clean_tweets(tweet):
    # remove stock market tickers like $GE
    tweet = re.sub(r'\$\w*', '', tweet)
 
    # remove old style retweet text "RT"
    tweet = re.sub(r'^RT[\s]+', '', tweet)
 
    # remove hyperlinks
    tweet = re.sub(r'https?:\/\/.*[\r\n]*', '', tweet)
    
    # remove hashtags
    # only removing the hash # sign from the word
    tweet = re.sub(r'#', '', tweet)
 
    # tokenize tweets
    tokenizer = TweetTokenizer(preserve_case=False, strip_handles=True, reduce_len=True)
    tweet_tokens = tokenizer.tokenize(tweet)
 
    tweets_clean = []    
    for word in tweet_tokens:
        if (word not in stopwords_english and # remove stopwords
              word not in emoticons and # remove emoticons
                word not in string.punctuation): # remove punctuation
            #tweets_clean.append(word)
            stem_word = stemmer.stem(word) # stemming word
            tweets_clean.append(stem_word)
 
    return tweets_clean

In [13]:
custom_tweet = "RT @Twitter @chapagain Hello There! Have a great day. :) #good #morning http://chapagain.com.np"
 
# print cleaned tweet
print (clean_tweets(custom_tweet))

['hello', 'great', 'day', 'good', 'morn']


In [14]:
print (pos_tweet[5])

@BhaktisBanter @PallaviRuhail This one is irresistible :)
#FlipkartFashionFriday http://t.co/EbZ0L2VENM


In [15]:
print (clean_tweets(pos_tweet[5]))

['one', 'irresist', 'flipkartfashionfriday']


In [16]:
def bag_of_words(tweet):
    words = clean_tweets(tweet)
    words_dictionary = dict([word, True] for word in words)    
    return words_dictionary

In [17]:
custom_tweet = "RT @Twitter @chapagain Hello There! Have a great day. :) #good #morning http://chapagain.com.np"
print (bag_of_words(custom_tweet))

{'hello': True, 'great': True, 'day': True, 'good': True, 'morn': True}


In [18]:
pos_tweets_set = []
for tweet in pos_tweet:
    pos_tweets_set.append((bag_of_words(tweet), 'pos')) 

In [19]:
neg_tweets_set = []
for tweet in neg_tweet:
    neg_tweets_set.append((bag_of_words(tweet), 'neg'))

In [20]:
neg_tweets_set[:10]

[({'hopeless': True, 'tmr': True}, 'neg'),
 ({'everyth': True,
   'kid': True,
   'section': True,
   'ikea': True,
   'cute': True,
   'shame': True,
   "i'm": True,
   'nearli': True,
   '19': True,
   '2': True,
   'month': True},
  'neg'),
 ({'heart': True, 'slide': True, 'wast': True, 'basket': True}, 'neg'),
 ({'“': True,
   'hate': True,
   'japanes': True,
   'call': True,
   'bani': True,
   '”': True},
  'neg'),
 ({'dang': True, 'start': True, 'next': True, 'week': True, 'work': True},
  'neg'),
 ({'oh': True, 'god': True, 'babi': True, 'face': True}, 'neg'),
 ({'make': True, 'smile': True}, 'neg'),
 ({'work': True,
   'neighbour': True,
   'motor': True,
   'ask': True,
   'said': True,
   'hate': True,
   'updat': True,
   'search': True},
  'neg'),
 ({'sialan': True}, 'neg'),
 ({'athabasca': True,
   'glacier': True,
   '1948': True,
   'jasper': True,
   'jaspernationalpark': True,
   'alberta': True,
   'explorealberta': True,
   '…': True},
  'neg')]

In [21]:
print (len(pos_tweets_set), len(neg_tweets_set))

5000 5000


In [22]:
from random import shuffle 
shuffle(pos_tweets_set)
shuffle(neg_tweets_set)
 
test_set = pos_tweets_set[:1000] + neg_tweets_set[:1000]
train_set = pos_tweets_set[1000:] + neg_tweets_set[1000:]
 
print(len(test_set),  len(train_set))

2000 8000


In [23]:
from nltk import classify
from nltk import NaiveBayesClassifier
 
classifier = NaiveBayesClassifier.train(train_set)
 
accuracy = classify.accuracy(classifier, test_set)
print(accuracy)

0.726


In [24]:
print (classifier.show_most_informative_features(5))  

Most Informative Features
                     via = True              pos : neg    =     40.3 : 1.0
                     bam = True              pos : neg    =     23.0 : 1.0
                    glad = True              pos : neg    =     22.3 : 1.0
                     sad = True              neg : pos    =     19.9 : 1.0
                     x15 = True              neg : pos    =     19.7 : 1.0
None


In [25]:
💞

SyntaxError: invalid character in identifier (<ipython-input-25-419aff40ddb4>, line 1)

In [26]:
def tweet_class(tweet):
    custom_tweet_set = bag_of_words(tweet)
    result = classifier.classify(custom_tweet_set)
    return result
    

In [27]:
'''custom_tweet = "I hated the film. It was a disaster. Poor direction, bad acting."
custom_tweet_set = bag_of_words(custom_tweet)
print(custom_tweet_set)
 
print (classifier.classify(custom_tweet_set)) # Output: neg
# Negative tweet correctly classified as negative
 
# probability result
prob_result = classifier.prob_classify(custom_tweet_set)
print (prob_result) # Output: <ProbDist with 2 samples>
print (prob_result.max()) # Output: neg
print (prob_result.prob("neg")) # Output: 0.941844352481
print (prob_result.prob("pos")) # Output: 0.0581556475194'''

'custom_tweet = "I hated the film. It was a disaster. Poor direction, bad acting."\ncustom_tweet_set = bag_of_words(custom_tweet)\nprint(custom_tweet_set)\n \nprint (classifier.classify(custom_tweet_set)) # Output: neg\n# Negative tweet correctly classified as negative\n \n# probability result\nprob_result = classifier.prob_classify(custom_tweet_set)\nprint (prob_result) # Output: <ProbDist with 2 samples>\nprint (prob_result.max()) # Output: neg\nprint (prob_result.prob("neg")) # Output: 0.941844352481\nprint (prob_result.prob("pos")) # Output: 0.0581556475194'

In [None]:
while True:
    sentence  = input("Enter a sentence")
    print(sentence)
    res = tweet_class(sentence)
    if res is'neg':
        print("Negative word")
    else:
        print("Positive word")
    cls =  input("Are you want to continue '(y/n)'")
    if cls is 'n' or cls is 'N' or cls is 'no' or cls is 'NO':
        break
    else:
        continue
    
    


Enter a sentencelike you
like you
Positive word
Are you want to continue '(y/n)'mk
Enter a sentenceso sad
so sad
Negative word
Are you want to continue '(y/n)'sad
Enter a sentencesad
sad
Negative word
Are you want to continue '(y/n)'happy
Enter a sentencehappy
happy
Positive word


In [None]:
from sklearn.svm import SVC

In [33]:
clf = SVC()

In [None]:
clf.fit()