In [19]:
import pandas as pd
import numpy as np
import os

In [9]:
numDimensions = 300
maxSeqLength = 25
batchSize = 24
lstmUnits = 64
numClasses = 2
iterations = 100000

In [10]:
wordsList = np.load('wordsList.npy').tolist()
wordsList = [word.decode('UTF-8') for word in wordsList] #Encode words as UTF-8
wordVectors = np.load('wordVectors.npy')

In [11]:
import tensorflow as tf
tf.reset_default_graph()

labels = tf.placeholder(tf.float32, [batchSize, numClasses])
input_data = tf.placeholder(tf.int32, [batchSize, maxSeqLength])

data = tf.Variable(tf.zeros([batchSize, maxSeqLength, numDimensions]),dtype=tf.float32)
data = tf.nn.embedding_lookup(wordVectors,input_data)

lstmCell = tf.contrib.rnn.BasicLSTMCell(lstmUnits)
lstmCell = tf.contrib.rnn.DropoutWrapper(cell=lstmCell, output_keep_prob=0.25)
value, _ = tf.nn.dynamic_rnn(lstmCell, data, dtype=tf.float32)

weight = tf.Variable(tf.truncated_normal([lstmUnits, numClasses]))
bias = tf.Variable(tf.constant(0.1, shape=[numClasses]))
value = tf.transpose(value, [1, 0, 2])
last = tf.gather(value, int(value.get_shape()[0]) - 1)
prediction = (tf.matmul(last, weight) + bias)

correctPred = tf.equal(tf.argmax(prediction,1), tf.argmax(labels,1))
accuracy = tf.reduce_mean(tf.cast(correctPred, tf.float32))

In [13]:
sess = tf.InteractiveSession()
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint('models'))

INFO:tensorflow:Restoring parameters from models/pretrained_lstm.ckpt-90000


In [14]:
# Removes punctuation, parentheses, question marks, etc., and leaves only alphanumeric characters
import re
strip_special_chars = re.compile("[^A-Za-z0-9 ]+")

def cleanSentences(string):
    string = string.lower().replace("<br />", " ")
    return re.sub(strip_special_chars, "", string.lower())

def getSentenceMatrix(sentence):
    arr = np.zeros([batchSize, maxSeqLength])
    sentenceMatrix = np.zeros([batchSize,maxSeqLength], dtype='int32')
    cleanedSentence = cleanSentences(sentence)
    split = cleanedSentence.split()
    for indexCounter, word in enumerate(split):
        if indexCounter >= maxSeqLength:
            break
        try:
            sentenceMatrix[0,indexCounter] = wordsList.index(word)
        except ValueError:
            sentenceMatrix[0,indexCounter] = 399999 #Vector for unkown words
    return sentenceMatrix

## Loading Mobile Brands Data

In [35]:
brand_data_dir = "twitter_data/BrandMobile_1/"
apple_tweet_path = os.path.join(brand_data_dir, "Apple.csv")
huawei_tweet_path = os.path.join(brand_data_dir, "Huawei.csv")
oppo_tweet_path = os.path.join(brand_data_dir, "Oppo.csv")
samsung_tweet_path = os.path.join(brand_data_dir, "Samsung.csv")
vivo_tweet_path = os.path.join(brand_data_dir, "Vivo.csv")

In [36]:
apple_tweet_df = pd.read_csv(apple_tweet_path, engine='python')
huawei_tweet_df = pd.read_csv(huawei_tweet_path, engine='python', error_bad_lines=False)
oppo_tweet_df = pd.read_csv(oppo_tweet_path, engine='python')
samsung_tweet_df = pd.read_csv(samsung_tweet_path, engine='python')
vivo_tweet_df = pd.read_csv(vivo_tweet_path, engine='python')

Skipping line 261: ',' expected after '"'


In [22]:
apple_tweet_df.head()

Unnamed: 0,Created-At,From-User,From-User-Id,To-User,To-User-Id,Language,Source,Text,Geo-Location-Latitude,Geo-Location-Longitude,Retweet-Count,Id
0,10/5/17 2:03 AM,The Times of London,6107422.0,,-1.0,en,<a href='https://www.sprinklr.com' rel='nofoll...,Upgrading to Apple�s latest iPhone operating s...,,,729.0,9.156535e+17
1,10/5/17 5:51 AM,hayley from Paramore,40981800.0,,-1.0,en,<a href='http://twitter.com/download/iphone' r...,need to feel feelings tonight in the big apple...,,,659.0,9.15711e+17
2,10/4/17 8:00 PM,Stranger Things,3320479000.0,,-1.0,en,<a href='https://studio.twitter.com' rel='nofo...,*do do do do do do do do* Stranger Things: The...,,,4424.0,9.155623e+17
3,10/5/17 4:15 PM,julia robert,2191746000.0,,-1.0,en,<a href='http://twitter.com' rel='nofollow'>Tw...,RT @techbeardblog: The Best iOS 11 Hidden Feat...,,,12.0,9.15868e+17
4,10/5/17 4:15 PM,Peggy L Henderson,492232300.0,,-1.0,en,<a href='http://www.Feed140.net' rel='nofollow...,Original and enjoyable read. Combination of hi...,,,0.0,9.15868e+17


## Prediction

In [39]:
#(0 = negative, 4 = positive)
def predict_sentiment(brand_data_df):
    predicted_labels = []
    for index, row in brand_data_df.iterrows():
        if index % 1000 == 0:
            print("index:", index)
        text = row['Text']
        input_matrix = getSentenceMatrix(text)
        predictedSentiment = sess.run(prediction, {input_data: input_matrix})[0]
        if (predictedSentiment[0] > predictedSentiment[1]):
            #pos
            predicted_labels.append(4)
            #print("Positive Sentiment")
        else:
            #neg
            predicted_labels.append(0)
            #print("Negative Sentiment")
    return predicted_labels

In [None]:
# apple_tweet_df
# huawei_tweet_df
# oppo_tweet_df
# samsung_tweet_df
# vivo_tweet_df

In [54]:
apple_predicted_sentiment = predict_sentiment(apple_tweet_df)
apple_tweet_df['sentiment'] = apple_predicted_sentiment
apple_tweet_df.to_csv(os.path.join(brand_data_dir, "Apple_with_sentiment.csv"))

index: 0


In [55]:
huawei_predicted_sentiment = predict_sentiment(huawei_tweet_df)
huawei_tweet_df['sentiment'] = huawei_predicted_sentiment
os.path.join(brand_data_dir, "Huawei_with_sentiment.csv")
huawei_tweet_df.to_csv(os.path.join(brand_data_dir, "Huawei_with_sentiment.csv"))

index: 0


In [56]:
oppo_predicted_sentiment = predict_sentiment(oppo_tweet_df)
oppo_tweet_df['sentiment'] = oppo_predicted_sentiment
oppo_tweet_df.to_csv(os.path.join(brand_data_dir, "Oppo_with_sentiment.csv"))

index: 0


In [57]:
samsung_predicted_sentiment = predict_sentiment(samsung_tweet_df)
samsung_tweet_df['sentiment'] = samsung_predicted_sentiment
os.path.join(brand_data_dir, "Samsung_with_sentiment.csv")
samsung_tweet_df.to_csv(os.path.join(brand_data_dir, "Samsung_with_sentiment.csv"))

index: 0


In [58]:
vivo_predicted_sentiment = predict_sentiment(vivo_tweet_df)
vivo_tweet_df['sentiment'] = vivo_predicted_sentiment
vivo_tweet_df.to_csv(os.path.join(brand_data_dir, "Vivo_with_sentiment.csv"))

index: 0
