In [1]:
import pandas as pd
tweets = pd.read_csv("Tweets.csv")
list(tweets.columns.values)

['tweet_id',
 'airline_sentiment',
 'airline_sentiment_confidence',
 'negativereason',
 'negativereason_confidence',
 'airline',
 'airline_sentiment_gold',
 'name',
 'negativereason_gold',
 'retweet_count',
 'text',
 'tweet_coord',
 'tweet_created',
 'tweet_location',
 'user_timezone']

In [2]:
tweets.head()

Unnamed: 0,tweet_id,airline_sentiment,airline_sentiment_confidence,negativereason,negativereason_confidence,airline,airline_sentiment_gold,name,negativereason_gold,retweet_count,text,tweet_coord,tweet_created,tweet_location,user_timezone
0,570306133677760513,neutral,1.0,,,Virgin America,,cairdin,,0,@VirginAmerica What @dhepburn said.,,2015-02-24 11:35:52 -0800,,Eastern Time (US & Canada)
1,570301130888122368,positive,0.3486,,0.0,Virgin America,,jnardino,,0,@VirginAmerica plus you've added commercials t...,,2015-02-24 11:15:59 -0800,,Pacific Time (US & Canada)
2,570301083672813571,neutral,0.6837,,,Virgin America,,yvonnalynn,,0,@VirginAmerica I didn't today... Must mean I n...,,2015-02-24 11:15:48 -0800,Lets Play,Central Time (US & Canada)
3,570301031407624196,negative,1.0,Bad Flight,0.7033,Virgin America,,jnardino,,0,@VirginAmerica it's really aggressive to blast...,,2015-02-24 11:15:36 -0800,,Pacific Time (US & Canada)
4,570300817074462722,negative,1.0,Can't Tell,1.0,Virgin America,,jnardino,,0,@VirginAmerica and it's a really big bad thing...,,2015-02-24 11:14:45 -0800,,Pacific Time (US & Canada)


In [3]:
sentiment_counts = tweets.airline_sentiment.value_counts()
number_of_tweets = tweets.tweet_id.count()
print(sentiment_counts)

negative    9178
neutral     3099
positive    2363
Name: airline_sentiment, dtype: int64


In [4]:
import re, nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))
wordnet_lemmatizer = WordNetLemmatizer()

In [5]:
def normalizer(tweet):
    only_letters = re.sub("[^a-zA-Z]", " ",tweet) 
    tokens = nltk.word_tokenize(only_letters)[2:]
    lower_case = [l.lower() for l in tokens]
    filtered_result = list(filter(lambda l: l not in stop_words, lower_case))
    lemmas = [wordnet_lemmatizer.lemmatize(t) for t in filtered_result]
    return lemmas

In [6]:
normalizer("Here is text about an airline I like.")

['text', 'airline', 'like']

In [7]:
pd.set_option('display.max_colwidth', -1) # Setting this so we can see the full content of cells
tweets['normalized_tweet'] = tweets.text.apply(normalizer)
tweets[['text','normalized_tweet']].head()

Unnamed: 0,text,normalized_tweet
0,@VirginAmerica What @dhepburn said.,"[dhepburn, said]"
1,@VirginAmerica plus you've added commercials to the experience... tacky.,"[added, commercial, experience, tacky]"
2,@VirginAmerica I didn't today... Must mean I need to take another trip!,"[today, must, mean, need, take, another, trip]"
3,"@VirginAmerica it's really aggressive to blast obnoxious ""entertainment"" in your guests' faces &amp; they have little recourse","[really, aggressive, blast, obnoxious, entertainment, guest, face, amp, little, recourse]"
4,@VirginAmerica and it's a really big bad thing about it,"[really, big, bad, thing]"


In [8]:
from nltk import ngrams
def ngrams(input_list):
    #onegrams = input_list
    bigrams = [' '.join(t) for t in list(zip(input_list, input_list[1:]))]
    trigrams = [' '.join(t) for t in list(zip(input_list, input_list[1:], input_list[2:]))]
    return bigrams+trigrams
tweets['grams'] = tweets.normalized_tweet.apply(ngrams)
tweets[['grams']].head()

Unnamed: 0,grams
0,[dhepburn said]
1,"[added commercial, commercial experience, experience tacky, added commercial experience, commercial experience tacky]"
2,"[today must, must mean, mean need, need take, take another, another trip, today must mean, must mean need, mean need take, need take another, take another trip]"
3,"[really aggressive, aggressive blast, blast obnoxious, obnoxious entertainment, entertainment guest, guest face, face amp, amp little, little recourse, really aggressive blast, aggressive blast obnoxious, blast obnoxious entertainment, obnoxious entertainment guest, entertainment guest face, guest face amp, face amp little, amp little recourse]"
4,"[really big, big bad, bad thing, really big bad, big bad thing]"


In [9]:
import collections
def count_words(input):
    cnt = collections.Counter()
    for row in input:
        for word in row:
            cnt[word] += 1
    return cnt

In [10]:
tweets[(tweets.airline_sentiment == 'negative')][['grams']].apply(count_words)['grams'].most_common(20)

[('http co', 449),
 ('customer service', 438),
 ('cancelled flightled', 425),
 ('late flight', 215),
 ('cancelled flighted', 196),
 ('flight cancelled', 185),
 ('late flightr', 144),
 ('cancelled flight', 131),
 ('hold hour', 128),
 ('flightled flight', 123),
 ('flight cancelled flightled', 117),
 ('flight delayed', 115),
 ('cancelled flightled flight', 107),
 ('call back', 106),
 ('booking problem', 98),
 ('gate agent', 83),
 ('flight flight', 74),
 ('hour late', 69),
 ('delayed flight', 69),
 ('flight attendant', 60)]

In [11]:
tweets[(tweets.airline_sentiment == 'positive')][['grams']].apply(count_words)['grams'].most_common(20)

[('http co', 233),
 ('customer service', 91),
 ('flight attendant', 25),
 ('quick response', 19),
 ('great flight', 17),
 ('best airline', 16),
 ('great job', 16),
 ('great service', 16),
 ('gate agent', 16),
 ('booking problem', 15),
 ('thanks help', 15),
 ('thank much', 15),
 ('good work', 14),
 ('fleet fleek', 14),
 ('fleek http', 14),
 ('fleet fleek http', 14),
 ('fleek http co', 14),
 ('guy rock', 13),
 ('looking forward', 13),
 ('great customer', 12)]

In [12]:
tweets[(tweets.airline_sentiment == 'neutral')][['grams']].apply(count_words)['grams'].most_common(20)

[('http co', 523),
 ('fleet fleek', 103),
 ('fleek http', 101),
 ('fleet fleek http', 101),
 ('fleek http co', 101),
 ('cancelled flightled', 47),
 ('jetblue fleet', 39),
 ('jetblue fleet fleek', 39),
 ('booking problem', 35),
 ('flight booking', 28),
 ('flight booking problem', 28),
 ('passenger wall', 21),
 ('customer service', 20),
 ('flight tomorrow', 19),
 ('rt jetblue', 19),
 ('cancelled flight', 18),
 ('late flight', 17),
 ('rt jetblue fleet', 17),
 ('wall street', 17),
 ('please help', 16)]

In [13]:
import numpy as np
from scipy.sparse import hstack
from sklearn.feature_extraction.text import CountVectorizer
count_vectorizer = CountVectorizer(ngram_range=(1,2))

In [14]:
vectorized_data = count_vectorizer.fit_transform(tweets.text)
indexed_data = hstack((np.array(range(0,vectorized_data.shape[0]))[:,None], vectorized_data))

In [15]:
def sentiment2target(sentiment):
    return {
        'negative': 0,
        'neutral': 1,
        'positive' : 2
    }[sentiment]
targets = tweets.airline_sentiment.apply(sentiment2target)

In [16]:
from sklearn.model_selection import train_test_split
data_train, data_test, targets_train, targets_test = train_test_split(indexed_data, targets, test_size=0.4, random_state=0)
data_train_index = data_train[:,0]
data_train = data_train[:,1:]
data_test_index = data_test[:,0]
data_test = data_test[:,1:]

In [17]:
from sklearn import svm
from sklearn.multiclass import OneVsRestClassifier
clf = OneVsRestClassifier(svm.SVC(gamma=0.01, C=100., probability=True, class_weight='balanced', kernel='linear'))
clf_output = clf.fit(data_train, targets_train)

In [18]:
clf.score(data_test, targets_test)

0.7851775956284153

In [22]:
sentences = count_vectorizer.transform([
    "What a great airline, the trip was a pleasure!",
    "My issue was quickly resolved after calling customer support. Thanks!",
    "What the hell! My flight was cancelled again. This sucks!",
    "Service was awful. I'll never fly with you again.",
    "You fuckers lost my luggage. Never again!",
    "I have mixed feelings about airlines. I don't know what I think.",
    ""
])
parr = clf.predict_proba(sentences)
parr

array([[0.20938008, 0.05917763, 0.73144229],
       [0.14280547, 0.07146739, 0.78572714],
       [0.94217319, 0.04060243, 0.01722439],
       [0.88917314, 0.07371555, 0.0371113 ],
       [0.97319008, 0.01770168, 0.00910824],
       [0.46820564, 0.50078587, 0.0310085 ],
       [0.26705541, 0.51905528, 0.21388932]])

In [23]:
type(parr)

numpy.ndarray

In [26]:
parr[0]

array([0.20938008, 0.05917763, 0.73144229])

In [27]:
type(parr[0])

numpy.ndarray

In [28]:
parr[0][0]

0.20938007694287428

In [29]:
type(parr[0][0])

numpy.float64

In [31]:
np.argmax(parr)

12

In [32]:
np.argmax(parr[0])

2

In [35]:
type(sentences)

scipy.sparse.csr.csr_matrix

In [36]:
sentences.shape

(7, 117630)

In [37]:
type(sentences.shape) 

tuple

In [53]:
for index in range(7):
    classed = np.argmax(parr[index])
    print(classed)

2
2
0
0
0
1
1


In [54]:
from credentials import *;

In [56]:
import tweepy
import csv

auth = tweepy.AppAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)

api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

In [22]:
import sys
import os

searchQuery = 'apple'  # KEYWORD
maxTweets = 200
tweetsPerQry = 100  # this is the max the API permits
fName = 'five.csv'


# If results from a specific ID onwards are reqd, set since_id to that ID.
# else default to no lower limit, go as far back as API allows
sinceId = None

# If results only below a specific ID are, set max_id to that ID.
# else default to no upper limit, start from the most recent tweet matching the search query.
max_id = -1

tweetCount = 0
print("Downloading max {0} tweets".format(maxTweets))
with open(fName, 'w', newline='') as csvfile:
    fdnames=['Time','Text']
    writer = csv.DictWriter(csvfile, fieldnames=fdnames)
    writer.writeheader()
    
    while tweetCount < maxTweets:
        try:
            if (max_id <= 0):
                if (not sinceId):
                    new_tweets = api.search(q=searchQuery, count=tweetsPerQry, lang = "en")
                else:
                    new_tweets = api.search(q=searchQuery, count=tweetsPerQry, lang = "en",
                                            since_id=sinceId)
            else:
                if (not sinceId):
                    new_tweets = api.search(q=searchQuery, count=tweetsPerQry, lang = "en",
                                            max_id=str(max_id - 1))
                else:
                    new_tweets = api.search(q=searchQuery, count=tweetsPerQry, lang = "en",
                                            max_id=str(max_id - 1),
                                            since_id=sinceId)
            if not new_tweets:
                print("No more tweets found")
                break
            for tweet in new_tweets:
                writer.writerow({'Time': tweet.created_at, 'Text': tweet.text.encode('utf-8')})
                
            tweetCount += len(new_tweets)
            print("Downloaded {0} tweets".format(tweetCount))
            max_id = new_tweets[-1].id
        except tweepy.TweepError as e:
            # Just exit if any error
            print("some error : " + str(e))
            break

print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fName))


Downloading max 200 tweets
Downloaded 100 tweets
Downloaded 200 tweets
Downloaded 200 tweets, Saved to five.csv


In [2]:
get_tweets=pd.read_csv("five.csv")

NameError: name 'pd' is not defined

In [24]:
get_tweets.head(10)

Unnamed: 0,Time,Text
0,2019-02-24 19:22:47,b'RT @ComicsinMotionP: This week we\xe2\x80\x99re joined by @popanimecomics to review Alita: Battle Angel. Promo for @GenuineChitChat. #britpodscene #bo\xe2\x80\xa6'
1,2019-02-24 19:22:47,"b'RT @Shazam: Start your morning with the newest tracks from @MarcAnthony, @steveaoki x @IAmAlanWalker, @OfficialMonstaX, @JohnMayer, @Pink,\xe2\x80\xa6'"
2,2019-02-24 19:22:46,"b""RT @51TJK: Sadly,.\xe2\x81\xa6@theresa_may\xe2\x81\xa9, that \xe2\x80\x9clargest democratic exercise in this country's history\xe2\x80\x9d was also the vote most traduced by foreign m\xe2\x80\xa6"""
3,2019-02-24 19:22:46,b'More tolerant liberal behavior https://t.co/6mZCa2kUPn'
4,2019-02-24 19:22:45,b'RT @OnePerfectShot: THE NICE GUYS (2016)\n\nCinematography by Philippe Rousselot \nDirected by Shane Black\nBuy or rent via @AppleTV: https://t\xe2\x80\xa6'
5,2019-02-24 19:22:45,b'RT @drharryedwards: When will we stop being surprised at such people telling us who they really are? Let\xe2\x80\x99s just believe them and respond ac\xe2\x80\xa6'
6,2019-02-24 19:22:44,b'@gardyfrombury @RickMontreal @RUH_11_95 @ksa_leader @nytimes He liked apple by the way \xf0\x9f\xa4\xa3\xf0\x9f\xa4\xa3\xf0\x9f\xa4\xa3\xf0\x9f\xa4\xa3'
7,2019-02-24 19:22:43,"b'RT @peteandbas: Do One is out on all platforms people, Spotify, Apple Music and all that\n\nhttps://t.co/Hjr1aXJhV8 https://t.co/uVpIuWCtqx'"
8,2019-02-24 19:22:43,b'RT @JoelOsteen: You\xe2\x80\x99re the Joshua generation. There\xe2\x80\x99s a blessing on your life that\xe2\x80\x99s going to thrust you to a new level. Watch Joel\xe2\x80\x99s new m\xe2\x80\xa6'
9,2019-02-24 19:22:42,b'RT @Brian_Cashman_: Apple copying the improvements of the Samsung S10 to integrate them into your iPhone XI #GalaxyS10 https://t.co/n2vxPPU\xe2\x80\xa6'


In [25]:
#get_tweets.drop(["Time"], axis=1, inplace=True)

In [26]:
get_tweets.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)

Unnamed: 0,Text
0,b'RT @ComicsinMotionP: This week we\xe2\x80\x99re joined by @popanimecomics to review Alita: Battle Angel. Promo for @GenuineChitChat. #britpodscene #bo\xe2\x80\xa6'
1,"b'RT @Shazam: Start your morning with the newest tracks from @MarcAnthony, @steveaoki x @IAmAlanWalker, @OfficialMonstaX, @JohnMayer, @Pink,\xe2\x80\xa6'"
2,"b""RT @51TJK: Sadly,.\xe2\x81\xa6@theresa_may\xe2\x81\xa9, that \xe2\x80\x9clargest democratic exercise in this country's history\xe2\x80\x9d was also the vote most traduced by foreign m\xe2\x80\xa6"""
3,b'More tolerant liberal behavior https://t.co/6mZCa2kUPn'
4,b'RT @OnePerfectShot: THE NICE GUYS (2016)\n\nCinematography by Philippe Rousselot \nDirected by Shane Black\nBuy or rent via @AppleTV: https://t\xe2\x80\xa6'
5,b'RT @drharryedwards: When will we stop being surprised at such people telling us who they really are? Let\xe2\x80\x99s just believe them and respond ac\xe2\x80\xa6'
6,b'@gardyfrombury @RickMontreal @RUH_11_95 @ksa_leader @nytimes He liked apple by the way \xf0\x9f\xa4\xa3\xf0\x9f\xa4\xa3\xf0\x9f\xa4\xa3\xf0\x9f\xa4\xa3'
7,"b'RT @peteandbas: Do One is out on all platforms people, Spotify, Apple Music and all that\n\nhttps://t.co/Hjr1aXJhV8 https://t.co/uVpIuWCtqx'"
8,b'RT @JoelOsteen: You\xe2\x80\x99re the Joshua generation. There\xe2\x80\x99s a blessing on your life that\xe2\x80\x99s going to thrust you to a new level. Watch Joel\xe2\x80\x99s new m\xe2\x80\xa6'
9,b'RT @Brian_Cashman_: Apple copying the improvements of the Samsung S10 to integrate them into your iPhone XI #GalaxyS10 https://t.co/n2vxPPU\xe2\x80\xa6'


In [27]:
get_tweets['NText'] = count_vectorizer.transform(get_tweets['Text'])

In [28]:
get_tweets.head(5)

Unnamed: 0,Text,NText
0,b'RT @ComicsinMotionP: This week we\xe2\x80\x99re joined by @popanimecomics to review Alita: Battle Angel. Promo for @GenuineChitChat. #britpodscene #bo\xe2\x80\xa6',"(0, 12054)\t1\n (0, 16916)\t1\n (0, 21200)\t1\n (0, 41556)\t1\n (0, 57504)\t1\n (0, 80338)\t1\n (0, 80343)\t1\n (0, 84053)\t1\n (0, 84781)\t1\n (0, 98660)\t1\n (0, 99045)\t1\n (0, 100288)\t1\n (0, 111125)\t1\n (0, 111663)\t1\n (0, 111735)\t1\n (1, 43237)\t1\n (1, 66165)\t1\n (1, 66249)\t1\n (1, 68551)\t1\n (1, 77842)\t1\n (1, 84781)\t1\n (1, 91776)\t1\n (1, 95906)\t1\n (1, 102790)\t1\n (1, 113771)\t1\n :\t:\n (198, 116536)\t1\n (199, 10225)\t1\n (199, 12046)\t1\n (199, 12924)\t1\n (199, 14513)\t1\n (199, 24967)\t2\n (199, 33730)\t1\n (199, 41462)\t1\n (199, 41556)\t1\n (199, 41959)\t1\n (199, 51477)\t2\n (199, 51478)\t2\n (199, 54282)\t1\n (199, 69755)\t1\n (199, 72670)\t1\n (199, 73275)\t1\n (199, 95148)\t1\n (199, 95196)\t1\n (199, 95906)\t1\n (199, 96052)\t1\n (199, 104071)\t1\n (199, 107048)\t1\n (199, 107209)\t1\n (199, 113097)\t1\n (199, 113185)\t1"
1,"b'RT @Shazam: Start your morning with the newest tracks from @MarcAnthony, @steveaoki x @IAmAlanWalker, @OfficialMonstaX, @JohnMayer, @Pink,\xe2\x80\xa6'","(0, 12054)\t1\n (0, 16916)\t1\n (0, 21200)\t1\n (0, 41556)\t1\n (0, 57504)\t1\n (0, 80338)\t1\n (0, 80343)\t1\n (0, 84053)\t1\n (0, 84781)\t1\n (0, 98660)\t1\n (0, 99045)\t1\n (0, 100288)\t1\n (0, 111125)\t1\n (0, 111663)\t1\n (0, 111735)\t1\n (1, 43237)\t1\n (1, 66165)\t1\n (1, 66249)\t1\n (1, 68551)\t1\n (1, 77842)\t1\n (1, 84781)\t1\n (1, 91776)\t1\n (1, 95906)\t1\n (1, 102790)\t1\n (1, 113771)\t1\n :\t:\n (198, 116536)\t1\n (199, 10225)\t1\n (199, 12046)\t1\n (199, 12924)\t1\n (199, 14513)\t1\n (199, 24967)\t2\n (199, 33730)\t1\n (199, 41462)\t1\n (199, 41556)\t1\n (199, 41959)\t1\n (199, 51477)\t2\n (199, 51478)\t2\n (199, 54282)\t1\n (199, 69755)\t1\n (199, 72670)\t1\n (199, 73275)\t1\n (199, 95148)\t1\n (199, 95196)\t1\n (199, 95906)\t1\n (199, 96052)\t1\n (199, 104071)\t1\n (199, 107048)\t1\n (199, 107209)\t1\n (199, 113097)\t1\n (199, 113185)\t1"
2,"b""RT @51TJK: Sadly,.\xe2\x81\xa6@theresa_may\xe2\x81\xa9, that \xe2\x80\x9clargest democratic exercise in this country's history\xe2\x80\x9d was also the vote most traduced by foreign m\xe2\x80\xa6""","(0, 12054)\t1\n (0, 16916)\t1\n (0, 21200)\t1\n (0, 41556)\t1\n (0, 57504)\t1\n (0, 80338)\t1\n (0, 80343)\t1\n (0, 84053)\t1\n (0, 84781)\t1\n (0, 98660)\t1\n (0, 99045)\t1\n (0, 100288)\t1\n (0, 111125)\t1\n (0, 111663)\t1\n (0, 111735)\t1\n (1, 43237)\t1\n (1, 66165)\t1\n (1, 66249)\t1\n (1, 68551)\t1\n (1, 77842)\t1\n (1, 84781)\t1\n (1, 91776)\t1\n (1, 95906)\t1\n (1, 102790)\t1\n (1, 113771)\t1\n :\t:\n (198, 116536)\t1\n (199, 10225)\t1\n (199, 12046)\t1\n (199, 12924)\t1\n (199, 14513)\t1\n (199, 24967)\t2\n (199, 33730)\t1\n (199, 41462)\t1\n (199, 41556)\t1\n (199, 41959)\t1\n (199, 51477)\t2\n (199, 51478)\t2\n (199, 54282)\t1\n (199, 69755)\t1\n (199, 72670)\t1\n (199, 73275)\t1\n (199, 95148)\t1\n (199, 95196)\t1\n (199, 95906)\t1\n (199, 96052)\t1\n (199, 104071)\t1\n (199, 107048)\t1\n (199, 107209)\t1\n (199, 113097)\t1\n (199, 113185)\t1"
3,b'More tolerant liberal behavior https://t.co/6mZCa2kUPn',"(0, 12054)\t1\n (0, 16916)\t1\n (0, 21200)\t1\n (0, 41556)\t1\n (0, 57504)\t1\n (0, 80338)\t1\n (0, 80343)\t1\n (0, 84053)\t1\n (0, 84781)\t1\n (0, 98660)\t1\n (0, 99045)\t1\n (0, 100288)\t1\n (0, 111125)\t1\n (0, 111663)\t1\n (0, 111735)\t1\n (1, 43237)\t1\n (1, 66165)\t1\n (1, 66249)\t1\n (1, 68551)\t1\n (1, 77842)\t1\n (1, 84781)\t1\n (1, 91776)\t1\n (1, 95906)\t1\n (1, 102790)\t1\n (1, 113771)\t1\n :\t:\n (198, 116536)\t1\n (199, 10225)\t1\n (199, 12046)\t1\n (199, 12924)\t1\n (199, 14513)\t1\n (199, 24967)\t2\n (199, 33730)\t1\n (199, 41462)\t1\n (199, 41556)\t1\n (199, 41959)\t1\n (199, 51477)\t2\n (199, 51478)\t2\n (199, 54282)\t1\n (199, 69755)\t1\n (199, 72670)\t1\n (199, 73275)\t1\n (199, 95148)\t1\n (199, 95196)\t1\n (199, 95906)\t1\n (199, 96052)\t1\n (199, 104071)\t1\n (199, 107048)\t1\n (199, 107209)\t1\n (199, 113097)\t1\n (199, 113185)\t1"
4,b'RT @OnePerfectShot: THE NICE GUYS (2016)\n\nCinematography by Philippe Rousselot \nDirected by Shane Black\nBuy or rent via @AppleTV: https://t\xe2\x80\xa6',"(0, 12054)\t1\n (0, 16916)\t1\n (0, 21200)\t1\n (0, 41556)\t1\n (0, 57504)\t1\n (0, 80338)\t1\n (0, 80343)\t1\n (0, 84053)\t1\n (0, 84781)\t1\n (0, 98660)\t1\n (0, 99045)\t1\n (0, 100288)\t1\n (0, 111125)\t1\n (0, 111663)\t1\n (0, 111735)\t1\n (1, 43237)\t1\n (1, 66165)\t1\n (1, 66249)\t1\n (1, 68551)\t1\n (1, 77842)\t1\n (1, 84781)\t1\n (1, 91776)\t1\n (1, 95906)\t1\n (1, 102790)\t1\n (1, 113771)\t1\n :\t:\n (198, 116536)\t1\n (199, 10225)\t1\n (199, 12046)\t1\n (199, 12924)\t1\n (199, 14513)\t1\n (199, 24967)\t2\n (199, 33730)\t1\n (199, 41462)\t1\n (199, 41556)\t1\n (199, 41959)\t1\n (199, 51477)\t2\n (199, 51478)\t2\n (199, 54282)\t1\n (199, 69755)\t1\n (199, 72670)\t1\n (199, 73275)\t1\n (199, 95148)\t1\n (199, 95196)\t1\n (199, 95906)\t1\n (199, 96052)\t1\n (199, 104071)\t1\n (199, 107048)\t1\n (199, 107209)\t1\n (199, 113097)\t1\n (199, 113185)\t1"


In [37]:
get_tweets['Sentiment'] = 0

In [None]:
prob = clf.predict_proba(get_tweets[0,'NText'])
print(prob,np.argmax(prob),"\n")
get_tweets[0,'Sentiment'] = np.argmax(prob)

In [1]:
for index,nt in get_tweets.iterrows():
    prob = clf.predict_proba(nt.NText)
    print(index,prob[index],np.argmax(prob[index]),"\n") #twas prob
    nt.Sentiment = np.argmax(prob[index])  #prob[index]???? #twas prob

NameError: name 'get_tweets' is not defined

In [39]:
get_tweets

Unnamed: 0,Text,NText,Sentiment
0,b'RT @ComicsinMotionP: This week we\xe2\x80\x99re joined by @popanimecomics to review Alita: Battle Angel. Promo for @GenuineChitChat. #britpodscene #bo\xe2\x80\xa6',"(0, 12054)\t1\n (0, 16916)\t1\n (0, 21200)\t1\n (0, 41556)\t1\n (0, 57504)\t1\n (0, 80338)\t1\n (0, 80343)\t1\n (0, 84053)\t1\n (0, 84781)\t1\n (0, 98660)\t1\n (0, 99045)\t1\n (0, 100288)\t1\n (0, 111125)\t1\n (0, 111663)\t1\n (0, 111735)\t1\n (1, 43237)\t1\n (1, 66165)\t1\n (1, 66249)\t1\n (1, 68551)\t1\n (1, 77842)\t1\n (1, 84781)\t1\n (1, 91776)\t1\n (1, 95906)\t1\n (1, 102790)\t1\n (1, 113771)\t1\n :\t:\n (198, 116536)\t1\n (199, 10225)\t1\n (199, 12046)\t1\n (199, 12924)\t1\n (199, 14513)\t1\n (199, 24967)\t2\n (199, 33730)\t1\n (199, 41462)\t1\n (199, 41556)\t1\n (199, 41959)\t1\n (199, 51477)\t2\n (199, 51478)\t2\n (199, 54282)\t1\n (199, 69755)\t1\n (199, 72670)\t1\n (199, 73275)\t1\n (199, 95148)\t1\n (199, 95196)\t1\n (199, 95906)\t1\n (199, 96052)\t1\n (199, 104071)\t1\n (199, 107048)\t1\n (199, 107209)\t1\n (199, 113097)\t1\n (199, 113185)\t1",0
1,"b'RT @Shazam: Start your morning with the newest tracks from @MarcAnthony, @steveaoki x @IAmAlanWalker, @OfficialMonstaX, @JohnMayer, @Pink,\xe2\x80\xa6'","(0, 12054)\t1\n (0, 16916)\t1\n (0, 21200)\t1\n (0, 41556)\t1\n (0, 57504)\t1\n (0, 80338)\t1\n (0, 80343)\t1\n (0, 84053)\t1\n (0, 84781)\t1\n (0, 98660)\t1\n (0, 99045)\t1\n (0, 100288)\t1\n (0, 111125)\t1\n (0, 111663)\t1\n (0, 111735)\t1\n (1, 43237)\t1\n (1, 66165)\t1\n (1, 66249)\t1\n (1, 68551)\t1\n (1, 77842)\t1\n (1, 84781)\t1\n (1, 91776)\t1\n (1, 95906)\t1\n (1, 102790)\t1\n (1, 113771)\t1\n :\t:\n (198, 116536)\t1\n (199, 10225)\t1\n (199, 12046)\t1\n (199, 12924)\t1\n (199, 14513)\t1\n (199, 24967)\t2\n (199, 33730)\t1\n (199, 41462)\t1\n (199, 41556)\t1\n (199, 41959)\t1\n (199, 51477)\t2\n (199, 51478)\t2\n (199, 54282)\t1\n (199, 69755)\t1\n (199, 72670)\t1\n (199, 73275)\t1\n (199, 95148)\t1\n (199, 95196)\t1\n (199, 95906)\t1\n (199, 96052)\t1\n (199, 104071)\t1\n (199, 107048)\t1\n (199, 107209)\t1\n (199, 113097)\t1\n (199, 113185)\t1",0
2,"b""RT @51TJK: Sadly,.\xe2\x81\xa6@theresa_may\xe2\x81\xa9, that \xe2\x80\x9clargest democratic exercise in this country's history\xe2\x80\x9d was also the vote most traduced by foreign m\xe2\x80\xa6""","(0, 12054)\t1\n (0, 16916)\t1\n (0, 21200)\t1\n (0, 41556)\t1\n (0, 57504)\t1\n (0, 80338)\t1\n (0, 80343)\t1\n (0, 84053)\t1\n (0, 84781)\t1\n (0, 98660)\t1\n (0, 99045)\t1\n (0, 100288)\t1\n (0, 111125)\t1\n (0, 111663)\t1\n (0, 111735)\t1\n (1, 43237)\t1\n (1, 66165)\t1\n (1, 66249)\t1\n (1, 68551)\t1\n (1, 77842)\t1\n (1, 84781)\t1\n (1, 91776)\t1\n (1, 95906)\t1\n (1, 102790)\t1\n (1, 113771)\t1\n :\t:\n (198, 116536)\t1\n (199, 10225)\t1\n (199, 12046)\t1\n (199, 12924)\t1\n (199, 14513)\t1\n (199, 24967)\t2\n (199, 33730)\t1\n (199, 41462)\t1\n (199, 41556)\t1\n (199, 41959)\t1\n (199, 51477)\t2\n (199, 51478)\t2\n (199, 54282)\t1\n (199, 69755)\t1\n (199, 72670)\t1\n (199, 73275)\t1\n (199, 95148)\t1\n (199, 95196)\t1\n (199, 95906)\t1\n (199, 96052)\t1\n (199, 104071)\t1\n (199, 107048)\t1\n (199, 107209)\t1\n (199, 113097)\t1\n (199, 113185)\t1",0
3,b'More tolerant liberal behavior https://t.co/6mZCa2kUPn',"(0, 12054)\t1\n (0, 16916)\t1\n (0, 21200)\t1\n (0, 41556)\t1\n (0, 57504)\t1\n (0, 80338)\t1\n (0, 80343)\t1\n (0, 84053)\t1\n (0, 84781)\t1\n (0, 98660)\t1\n (0, 99045)\t1\n (0, 100288)\t1\n (0, 111125)\t1\n (0, 111663)\t1\n (0, 111735)\t1\n (1, 43237)\t1\n (1, 66165)\t1\n (1, 66249)\t1\n (1, 68551)\t1\n (1, 77842)\t1\n (1, 84781)\t1\n (1, 91776)\t1\n (1, 95906)\t1\n (1, 102790)\t1\n (1, 113771)\t1\n :\t:\n (198, 116536)\t1\n (199, 10225)\t1\n (199, 12046)\t1\n (199, 12924)\t1\n (199, 14513)\t1\n (199, 24967)\t2\n (199, 33730)\t1\n (199, 41462)\t1\n (199, 41556)\t1\n (199, 41959)\t1\n (199, 51477)\t2\n (199, 51478)\t2\n (199, 54282)\t1\n (199, 69755)\t1\n (199, 72670)\t1\n (199, 73275)\t1\n (199, 95148)\t1\n (199, 95196)\t1\n (199, 95906)\t1\n (199, 96052)\t1\n (199, 104071)\t1\n (199, 107048)\t1\n (199, 107209)\t1\n (199, 113097)\t1\n (199, 113185)\t1",0
4,b'RT @OnePerfectShot: THE NICE GUYS (2016)\n\nCinematography by Philippe Rousselot \nDirected by Shane Black\nBuy or rent via @AppleTV: https://t\xe2\x80\xa6',"(0, 12054)\t1\n (0, 16916)\t1\n (0, 21200)\t1\n (0, 41556)\t1\n (0, 57504)\t1\n (0, 80338)\t1\n (0, 80343)\t1\n (0, 84053)\t1\n (0, 84781)\t1\n (0, 98660)\t1\n (0, 99045)\t1\n (0, 100288)\t1\n (0, 111125)\t1\n (0, 111663)\t1\n (0, 111735)\t1\n (1, 43237)\t1\n (1, 66165)\t1\n (1, 66249)\t1\n (1, 68551)\t1\n (1, 77842)\t1\n (1, 84781)\t1\n (1, 91776)\t1\n (1, 95906)\t1\n (1, 102790)\t1\n (1, 113771)\t1\n :\t:\n (198, 116536)\t1\n (199, 10225)\t1\n (199, 12046)\t1\n (199, 12924)\t1\n (199, 14513)\t1\n (199, 24967)\t2\n (199, 33730)\t1\n (199, 41462)\t1\n (199, 41556)\t1\n (199, 41959)\t1\n (199, 51477)\t2\n (199, 51478)\t2\n (199, 54282)\t1\n (199, 69755)\t1\n (199, 72670)\t1\n (199, 73275)\t1\n (199, 95148)\t1\n (199, 95196)\t1\n (199, 95906)\t1\n (199, 96052)\t1\n (199, 104071)\t1\n (199, 107048)\t1\n (199, 107209)\t1\n (199, 113097)\t1\n (199, 113185)\t1",0
5,b'RT @drharryedwards: When will we stop being surprised at such people telling us who they really are? Let\xe2\x80\x99s just believe them and respond ac\xe2\x80\xa6',"(0, 12054)\t1\n (0, 16916)\t1\n (0, 21200)\t1\n (0, 41556)\t1\n (0, 57504)\t1\n (0, 80338)\t1\n (0, 80343)\t1\n (0, 84053)\t1\n (0, 84781)\t1\n (0, 98660)\t1\n (0, 99045)\t1\n (0, 100288)\t1\n (0, 111125)\t1\n (0, 111663)\t1\n (0, 111735)\t1\n (1, 43237)\t1\n (1, 66165)\t1\n (1, 66249)\t1\n (1, 68551)\t1\n (1, 77842)\t1\n (1, 84781)\t1\n (1, 91776)\t1\n (1, 95906)\t1\n (1, 102790)\t1\n (1, 113771)\t1\n :\t:\n (198, 116536)\t1\n (199, 10225)\t1\n (199, 12046)\t1\n (199, 12924)\t1\n (199, 14513)\t1\n (199, 24967)\t2\n (199, 33730)\t1\n (199, 41462)\t1\n (199, 41556)\t1\n (199, 41959)\t1\n (199, 51477)\t2\n (199, 51478)\t2\n (199, 54282)\t1\n (199, 69755)\t1\n (199, 72670)\t1\n (199, 73275)\t1\n (199, 95148)\t1\n (199, 95196)\t1\n (199, 95906)\t1\n (199, 96052)\t1\n (199, 104071)\t1\n (199, 107048)\t1\n (199, 107209)\t1\n (199, 113097)\t1\n (199, 113185)\t1",0
6,b'@gardyfrombury @RickMontreal @RUH_11_95 @ksa_leader @nytimes He liked apple by the way \xf0\x9f\xa4\xa3\xf0\x9f\xa4\xa3\xf0\x9f\xa4\xa3\xf0\x9f\xa4\xa3',"(0, 12054)\t1\n (0, 16916)\t1\n (0, 21200)\t1\n (0, 41556)\t1\n (0, 57504)\t1\n (0, 80338)\t1\n (0, 80343)\t1\n (0, 84053)\t1\n (0, 84781)\t1\n (0, 98660)\t1\n (0, 99045)\t1\n (0, 100288)\t1\n (0, 111125)\t1\n (0, 111663)\t1\n (0, 111735)\t1\n (1, 43237)\t1\n (1, 66165)\t1\n (1, 66249)\t1\n (1, 68551)\t1\n (1, 77842)\t1\n (1, 84781)\t1\n (1, 91776)\t1\n (1, 95906)\t1\n (1, 102790)\t1\n (1, 113771)\t1\n :\t:\n (198, 116536)\t1\n (199, 10225)\t1\n (199, 12046)\t1\n (199, 12924)\t1\n (199, 14513)\t1\n (199, 24967)\t2\n (199, 33730)\t1\n (199, 41462)\t1\n (199, 41556)\t1\n (199, 41959)\t1\n (199, 51477)\t2\n (199, 51478)\t2\n (199, 54282)\t1\n (199, 69755)\t1\n (199, 72670)\t1\n (199, 73275)\t1\n (199, 95148)\t1\n (199, 95196)\t1\n (199, 95906)\t1\n (199, 96052)\t1\n (199, 104071)\t1\n (199, 107048)\t1\n (199, 107209)\t1\n (199, 113097)\t1\n (199, 113185)\t1",0
7,"b'RT @peteandbas: Do One is out on all platforms people, Spotify, Apple Music and all that\n\nhttps://t.co/Hjr1aXJhV8 https://t.co/uVpIuWCtqx'","(0, 12054)\t1\n (0, 16916)\t1\n (0, 21200)\t1\n (0, 41556)\t1\n (0, 57504)\t1\n (0, 80338)\t1\n (0, 80343)\t1\n (0, 84053)\t1\n (0, 84781)\t1\n (0, 98660)\t1\n (0, 99045)\t1\n (0, 100288)\t1\n (0, 111125)\t1\n (0, 111663)\t1\n (0, 111735)\t1\n (1, 43237)\t1\n (1, 66165)\t1\n (1, 66249)\t1\n (1, 68551)\t1\n (1, 77842)\t1\n (1, 84781)\t1\n (1, 91776)\t1\n (1, 95906)\t1\n (1, 102790)\t1\n (1, 113771)\t1\n :\t:\n (198, 116536)\t1\n (199, 10225)\t1\n (199, 12046)\t1\n (199, 12924)\t1\n (199, 14513)\t1\n (199, 24967)\t2\n (199, 33730)\t1\n (199, 41462)\t1\n (199, 41556)\t1\n (199, 41959)\t1\n (199, 51477)\t2\n (199, 51478)\t2\n (199, 54282)\t1\n (199, 69755)\t1\n (199, 72670)\t1\n (199, 73275)\t1\n (199, 95148)\t1\n (199, 95196)\t1\n (199, 95906)\t1\n (199, 96052)\t1\n (199, 104071)\t1\n (199, 107048)\t1\n (199, 107209)\t1\n (199, 113097)\t1\n (199, 113185)\t1",0
8,b'RT @JoelOsteen: You\xe2\x80\x99re the Joshua generation. There\xe2\x80\x99s a blessing on your life that\xe2\x80\x99s going to thrust you to a new level. Watch Joel\xe2\x80\x99s new m\xe2\x80\xa6',"(0, 12054)\t1\n (0, 16916)\t1\n (0, 21200)\t1\n (0, 41556)\t1\n (0, 57504)\t1\n (0, 80338)\t1\n (0, 80343)\t1\n (0, 84053)\t1\n (0, 84781)\t1\n (0, 98660)\t1\n (0, 99045)\t1\n (0, 100288)\t1\n (0, 111125)\t1\n (0, 111663)\t1\n (0, 111735)\t1\n (1, 43237)\t1\n (1, 66165)\t1\n (1, 66249)\t1\n (1, 68551)\t1\n (1, 77842)\t1\n (1, 84781)\t1\n (1, 91776)\t1\n (1, 95906)\t1\n (1, 102790)\t1\n (1, 113771)\t1\n :\t:\n (198, 116536)\t1\n (199, 10225)\t1\n (199, 12046)\t1\n (199, 12924)\t1\n (199, 14513)\t1\n (199, 24967)\t2\n (199, 33730)\t1\n (199, 41462)\t1\n (199, 41556)\t1\n (199, 41959)\t1\n (199, 51477)\t2\n (199, 51478)\t2\n (199, 54282)\t1\n (199, 69755)\t1\n (199, 72670)\t1\n (199, 73275)\t1\n (199, 95148)\t1\n (199, 95196)\t1\n (199, 95906)\t1\n (199, 96052)\t1\n (199, 104071)\t1\n (199, 107048)\t1\n (199, 107209)\t1\n (199, 113097)\t1\n (199, 113185)\t1",0
9,b'RT @Brian_Cashman_: Apple copying the improvements of the Samsung S10 to integrate them into your iPhone XI #GalaxyS10 https://t.co/n2vxPPU\xe2\x80\xa6',"(0, 12054)\t1\n (0, 16916)\t1\n (0, 21200)\t1\n (0, 41556)\t1\n (0, 57504)\t1\n (0, 80338)\t1\n (0, 80343)\t1\n (0, 84053)\t1\n (0, 84781)\t1\n (0, 98660)\t1\n (0, 99045)\t1\n (0, 100288)\t1\n (0, 111125)\t1\n (0, 111663)\t1\n (0, 111735)\t1\n (1, 43237)\t1\n (1, 66165)\t1\n (1, 66249)\t1\n (1, 68551)\t1\n (1, 77842)\t1\n (1, 84781)\t1\n (1, 91776)\t1\n (1, 95906)\t1\n (1, 102790)\t1\n (1, 113771)\t1\n :\t:\n (198, 116536)\t1\n (199, 10225)\t1\n (199, 12046)\t1\n (199, 12924)\t1\n (199, 14513)\t1\n (199, 24967)\t2\n (199, 33730)\t1\n (199, 41462)\t1\n (199, 41556)\t1\n (199, 41959)\t1\n (199, 51477)\t2\n (199, 51478)\t2\n (199, 54282)\t1\n (199, 69755)\t1\n (199, 72670)\t1\n (199, 73275)\t1\n (199, 95148)\t1\n (199, 95196)\t1\n (199, 95906)\t1\n (199, 96052)\t1\n (199, 104071)\t1\n (199, 107048)\t1\n (199, 107209)\t1\n (199, 113097)\t1\n (199, 113185)\t1",0
