In [1]:
# Import neccesarry modules
import numpy as np
import nltk
from nltk.corpus import twitter_samples
import json
from twitter import *
from gender import GenderDetector
import os
from datetime import *
from ethnicity import *
import copy
import difflib

running make_dicts...


In [2]:
import time
# Import Bokeh Packages
from bokeh.layouts import row, column, widgetbox, layout
from bokeh.models.widgets import Button, TextInput, Select, Div, DataTable, TableColumn, NumberFormatter, Panel, Tabs
from bokeh.models import HoverTool, ColumnDataSource, GMapOptions
from bokeh.plotting import show, figure, gmap
from bokeh.io import show, push_notebook, output_notebook, reset_output
from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application
from bokeh.document import Document
#pi chart
from math import pi
import pandas as pd
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.transform import cumsum

In [3]:
#some global variables
DEFAULT_PHONE = 'iPhone XS'
CHART_COLORS = ['#5ae544', '#e54444']
CHART_COLORS_G = ['#42c5f4', '#f441be','#000000']
CHART_COLORS_R = ['#cde5cd','#eaf9f9','#960a24','#ca8491','#00a5c8','#85bfc6','#00c8af',
                  '#9bcdff','#9bffff','#9abd19','#23788a','#35a2af','#df2740','#132759',
                  '#ff0000','#ffff00','#220007','#cc6845','#bc7754','#432c22','#a87057',
                  '#373e2e', '#352e3e','#ae6339','#271374','#800000']
PADDING = 0.1
APP_WIDTH = 650
APP_HEIGHT = 700
PhoneList = ['iPhone XS','Mate','Galaxy Note','Pixel','oneplus','P20','LG','XPERIA','Galaxy S9']
phoneTag = ['iPhone XS','Mate20', 'Galaxy Note8', 'Pixel3', 'Oneplus6T', 'HW P20', 'LG G7','XPERIA','Galaxy S9']
phoneTagCopy = []
PhoneListCopy = []

In [4]:
# Able to classify tweet to either positive or negative category
# Need to use singleton pattern
class TweetEmotionClassifier:
    def __init__(self):
        tweets = [(map(lambda x:x.lower(), pos_tweet.split()), 'pos') for pos_tweet in twitter_samples.strings('positive_tweets.json')] + [(map(lambda x:x.lower(), neg_tweet.split()), 'neg') for neg_tweet in twitter_samples.strings('negative_tweets.json')]
        all_words = self._TweetEmotionClassifier__get_all_words()
        word_freq_dist = nltk.FreqDist(all_words)
        self.features = [word for (word, _) in word_freq_dist.most_common(600)]
        training_samples = [(self._TweetEmotionClassifier__extract_features(tweet), category) for (tweet, category) in tweets]
        self.classifier = nltk.NaiveBayesClassifier.train(training_samples)
        
    def __get_all_words(self):
        pos_words = [word.lower() for pos_tweet in twitter_samples.strings('positive_tweets.json') for word in pos_tweet.split()]
        neg_words = [word.lower() for neg_tweet in twitter_samples.strings('negative_tweets.json') for word in neg_tweet.split()]
        all_words = pos_words + neg_words
        return all_words
    
    def classify(self, tweet):
        featurized_tweet = self._TweetEmotionClassifier__extract_features(map(lambda x:x.lower(), tweet.split()))
        return self.classifier.classify(featurized_tweet)
        
    def __extract_features(self, tweet):
        tweet_words = set(tweet)
        extracted_features = {}
        for feature in self.features:
            extracted_features['contains(%s)' % feature] = (feature in tweet_words)
        return extracted_features

# Tweet data object
class Tweet:
    def __init__(self, tweet):
        self.text = tweet['text']
        self.date = tweet['created_at']
        self.user_name = tweet['user']['name']
        self.user_screen_name = tweet['user']['screen_name']
        self.location = tweet['user']['location']
        
    def get_text(self):
        return self.text
    
    def get_user_name(self):
        return self.user_name
    
    def get_user_screen_name(self):
        return self.user_screen_name
    
    def get_location(self):
        return self.location
    
    def get_date(self):
        return self.date
    
class MobileFeaturesAnalyzer:
    def __init__(self, emotion_classifier):
        self.emotion_classifier = emotion_classifier
        self.mobile_features_lookup_table = json.loads(open("feature_keywords.json", "r").read())
    
    def analyze_single_tweet(self, tweet):
        extracted_features = {}
        lowered_tweet = tweet.lower()
        for feature in self.mobile_features_lookup_table.keys():
            for key_word in self.mobile_features_lookup_table[feature]:
                if lowered_tweet.find(key_word) != -1:
                    keyword_begin = tweet.find(key_word)
                    keyword_end = keyword_begin + len(key_word) - 1
                    if (True if keyword_begin-1 < 0 else tweet[keyword_begin-1].isalpha()) or (True if keyword_end+1 == len(tweet) else tweet[keyword_end+1].isalpha()):
                        continue
                    related_sentence = self._MobileFeaturesAnalyzer__extract_keyword_related_sentence(keyword_begin, keyword_end, tweet, key_word)
                    if feature not in extracted_features.keys():
                        extracted_features[feature] = []
                    if related_sentence not in extracted_features[feature]:
                        extracted_features[feature].append(related_sentence)
        
        if not extracted_features:
            return extracted_features
        
        for feature in extracted_features.keys():
            pos_count = 0
            neg_count = 0
            for sentence in extracted_features[feature]:
                if self.emotion_classifier.classify(sentence) == 'pos':
                    pos_count += 1
                if self.emotion_classifier.classify(sentence) == 'neg':
                    neg_count += 1
            sentence_count = len(extracted_features[feature])
            pos_ratio = pos_count/sentence_count
            neg_ratio = neg_count/sentence_count
            extracted_features[feature] = 'pos' if pos_ratio >= 0.5 else 'neg'
        return extracted_features
    
    def __extract_keyword_related_sentence(self, begin, end, tweet, key_word):
        termination_tokens = ['.', ',', '!', '?', '"']
        sentence = key_word
        i,j = begin-1,end+1
        while True:
            if i < 0:
                break
            if tweet[i] in termination_tokens:
                break
            sentence = tweet[i] + sentence
            i -= 1
            
        while True:
            if j == len(tweet):
                break
            sentence += tweet[j]
            if tweet[j] in termination_tokens:
                break
            j += 1

        return sentence
    
    def feature_statistics(self, tweets):
        feature_statistics = {}
        for tweet in tweets:
            features_analysis = self.analyze_single_tweet(tweet)
            for feature in features_analysis.keys():
                if feature in feature_statistics:
                    feature_statistics[feature] += 1
                else:
                    feature_statistics[feature] = 1
        
        return feature_statistics
    
    def analyze_tweets(self, tweets):
        feature_analysis = {}
        for tweet in tweets:
            single_tweet_features_analysis = self.analyze_single_tweet(tweet)
            for feature in single_tweet_features_analysis.keys():
                if feature not in feature_analysis:
                    feature_analysis[feature] = {'pos': 0, 'neg': 0}
                if single_tweet_features_analysis[feature] == 'pos':
                    feature_analysis[feature]['pos'] += 1
                else:
                    feature_analysis[feature]['neg'] += 1
        
        return feature_analysis
    
    def most_liked_feature(self, tweets):
        return sorted(self.analyze_tweets(tweets).items(), key=lambda x: x[1]['pos']/(x[1]['pos'] + x[1]['neg']), reverse=True)[0][0]
    
    def most_disliked_feature(self, tweets):
        return sorted(self.analyze_tweets(tweets).items(), key=lambda x: x[1]['neg']/(x[1]['pos'] + x[1]['neg']), reverse=True)[0][0]
    
class Tweets:
    def __init__(self, query, size=1000):
        credential = json.loads(open("credential.json", "r").read())
        auth = OAuth(credential['OAUTH_TOKEN'], credential['OAUTH_TOKEN_SECRET'], credential['CONSUMER_KEY'], credential['CONSUMER_SECRET'])
        twitter = Twitter(auth=auth)
        self.tweets = self._Tweets__fetch_tweets(query, twitter, size)
        self.ethnicity_predictor = Ethnicity().make_dicts()
        
    def __filter_non_informative_tweets(self, tweets):
        filtered_tweets = []
        MINIMUM_WORD_COUNT = 6
        MAXIMUM_SHORT_SENTENCE_COUNT = 2
        for tweet in tweets:
            tweet_word_count = len(tweet.get_text().split())
            short_sentence_count = len([len(sentence) for sentence in tweet.get_text().split('\n') if len(sentence.split()) < 5 and len(sentence.split()) > 0])
            ata_count = len([char for char in tweet.get_text() if char == '@'])
            if tweet_word_count < MINIMUM_WORD_COUNT or short_sentence_count > MAXIMUM_SHORT_SENTENCE_COUNT or is_advertisement(tweet.get_text()):
                continue
            filtered_tweets.append(tweet)
        return filtered_tweets
    
    def __remove_duplicate_tweets(self, tweets):
        filtered_tweets = []
        tweet_text_set = set()
        for tweet in tweets:
            if tweet.get_text() not in tweet_text_set:
                filtered_tweets.append(tweet)
                tweet_text_set.add(tweet.get_text())
        return filtered_tweets
    
    def __fetch_tweets(self, query, twitter, size):
        tweets = []
        try:
            max_id = twitter.search.tweets(q=query,lang='en',count=1)['search_metadata']['max_id']
        except:
            return tweets
        while (len(tweets)<size):
            load_size = 100
            if size - len(tweets) <= 100:
                load_size = size - len(tweets)
            try:
                tweet_search = twitter.search.tweets(q=query, lang='en',count=load_size, max_id=max_id)
            except:
                return tweets
            number_of_tweets = len(tweet_search['statuses'])
            if number_of_tweets == 0:
                break
            max_id = tweet_search['statuses'][number_of_tweets-1]['id'] - 1
            tweets += [Tweet(tweet) for tweet in tweet_search['statuses']]
            tweets = self._Tweets__filter_non_informative_tweets(tweets)
            tweets = self._Tweets__remove_duplicate_tweets(tweets)
        return tweets
        
    def get_tweets(self):
        return self.tweets
        
    def __predict_gender(self, names, screen_names):
        gender_detector = GenderDetector()
        predictions = []
        names_count = len(names)
        for i in range(names_count):
            prediction = gender_detector.gender(names[i])
            if prediction == None:
                prediction = gender_detector.gender(screen_names[i])
            predictions.append(prediction)
    
        return predictions
        
    def gender_statistics(self):
        names = [tweet.get_user_name() for tweet in self.tweets]
        screen_names = [tweet.get_user_screen_name() for tweet in self.tweets]
        
        predictions = self._Tweets__predict_gender(names, screen_names)
        stats = {}
        total = len(predictions)
        male_total, female_total, NA_total = 0, 0, 0
        for prediction in predictions:
            if prediction == 'm':
                male_total += 1
            elif prediction == 'f':
                female_total += 1
            else:
                NA_total += 1
        stats['male'] = male_total/total
        stats['female'] = female_total/total
        stats['NA'] = NA_total/total
        return stats
        
    def race_statistics(self):
        names = [tweet.get_user_name() for tweet in self.tweets]
        predictions = self.ethnicity_predictor.get(names)
        stats = {}
        total = len(names)
        for ethnicity in predictions["Ethnicity"]:
            if ethnicity not in stats:
                stats[ethnicity] = 1
            else:
                stats[ethnicity] += 1
        return {(k+"_percentage"): v/total for k, v in stats.items()}
    
    def __filter_invalid_location(self, locations):
        cities_states_lookup_table = json.loads(open("cities_and_states.json", "r").read())
        filtered_locations = [location for location in locations if ', ' in location]
        filtered_locations = [location for location in filtered_locations if location.split(', ')[1] in cities_states_lookup_table]
        filtered_locations = [location for location in filtered_locations if location.split(', ')[0] in cities_states_lookup_table[location.split(', ')[1]]]
        return filtered_locations
    
    def location_statistics(self):
        locations = [tweet.get_location() for tweet in self.tweets if tweet.get_location() != '']
        non_filtered_locations_count = len(locations)
        locations = self._Tweets__filter_invalid_location(locations)
        stats = {}
        for location in locations:
            if location not in stats:
                stats[location] = 1
            else:
                stats[location] += 1
        if non_filtered_locations_count != 0:
            stats['unknown'] = non_filtered_locations_count - len(locations)
        return stats
    
    def __pos_attitude_percentage(self, attitudes):
        attitudes_total = len(attitudes)
        pos_total = 0
        for attitude in attitudes:
            if attitude == 'pos':
                pos_total += 1

        return pos_total/attitudes_total
    
    def pos_attitude_percentage(self, emotion_classifier):
        attitudes = []
        for tweet in self.get_tweets():
            attitudes.append(emotion_classifier.classify(tweet.get_text()))

        return self._Tweets__pos_attitude_percentage(attitudes)
    
    def __neg_attitude_percentage(self, attitudes):
        attitudes_total = len(attitudes)
        neg_total = 0
        for attitude in attitudes:
            if attitude == 'neg':
                neg_total += 1

        return neg_total/attitudes_total
    
    def neg_attitude_percentage(self, emotion_classifier):
        attitudes = []
        for tweet in self.get_tweets():
            attitudes.append(emotion_classifier.classify(tweet.get_text()))

        return self._Tweets__neg_attitude_percentage(attitudes)
    
    def overall_attitude(self, emotion_classifier):
        attitudes = []
        for tweet in self.get_tweets():
            attitudes.append(emotion_classifier.classify(tweet.get_text()))
            
        pos_percentage = self._Tweets__pos_attitude_percentage(attitudes)
        neg_percentage = self._Tweets__neg_attitude_percentage(attitudes)
        overall_attitude = ''
        if pos_percentage > neg_percentage:
            return 'pos'
        if neg_percentage > pos_percentage:
            return 'neg'
        if neg_percentage == pos_percentage:
            return 'neutral'
        
def is_advertisement(tweet):
    is_advertisement = False
    advertisement_keywords = ['sale', 'sales', 'sell', 'buy', 'blackfriday', 'black friday', 'cyber monday', 'discount', 'discounts', 'ebay', 'amazon', 'best buy', 'bestbuy', 'walmart', 'target', 'used', 'new', 'condition']
    for advertisement_keyword in advertisement_keywords:
        if advertisement_keyword in tweet.lower():
            is_advertisement = True
            break
    return is_advertisement
 
class CompetingProductFinder:
    def __init__(self, product):
        #debug
        self.CCCC = 0
        self.product = product
        credential = json.loads(open("credential.json", "r").read())
        #auth = OAuth(credential['OAUTH_TOKEN_2'], credential['OAUTH_TOKEN_SECRET_2'], credential['CONSUMER_KEY_2'], credential['CONSUMER_SECRET_2'])
        auth = OAuth2(bearer_token='AAAAAAAAAAAAAAAAAAAAAJ0p8wAAAAAAL2SRIv1cCuz5B8GtLmTf%2FK88KyA%3DcdNO115mC8dRAkbGgURO3dL2Oj8nA2R2ldNEQRnsJV6ntQBlS3')
        self.twitter = Twitter(auth=auth)
    
    def __filter_non_informative_tweets(self, tweets):
        filtered_tweets = []
        MINIMUM_WORD_COUNT = 6
        MAXIMUM_SHORT_SENTENCE_COUNT = 2
        for tweet in tweets:
            tweet_word_count = len(tweet.get_text().split())
            short_sentence_count = len([len(sentence) for sentence in tweet.get_text().split('\n') if len(sentence.split()) < 5 and len(sentence.split()) > 0])
            ata_count = len([char for char in tweet.get_text() if char == '@'])
            if tweet_word_count < MINIMUM_WORD_COUNT or short_sentence_count > MAXIMUM_SHORT_SENTENCE_COUNT or is_advertisement(tweet.get_text()):
                continue
            filtered_tweets.append(tweet)
        return filtered_tweets
    
    def __fetch_tweets(self, potential_compete_product, d=None):
        tweets = []
        if d == None:
            d = str(date.today())
        try:
            head_search = self.twitter.search.tweets(q=self.product, until=d, result_type='recent', lang='en')
        except:
            return tweets
        if len(head_search['statuses']) == 0:
            return tweets
        max_id = head_search['search_metadata']['max_id']
        day = int(head_search['statuses'][0]['created_at'][8:10])
        break_flag = False
        while break_flag == False:
            try:
                tweet_search = self.twitter.search.tweets(q="%s %s"%(self.product, potential_compete_product), result_type='recent', until=d, lang='en', count=100, max_id=max_id)
            except:
                #if len(tweet_search['statuses']) == 0:
                #    print("in exception !!!!!!!!!!!!!!!!")
                #    return tweets
                #max_id = tweet_search['statuses'][len(tweet_search['statuses'])-1]['id'] - 1
                #for tweet in tweet_search['statuses']:
                #    tweet_created_day = int(tweet['created_at'][8:10])
                #    if (tweet_created_day != day):
                #        break_flag=True
                #        break
                #   tweets.append(Tweet(tweet))
                return tweets
            if len(tweet_search['statuses']) == 0:
                return tweets
            max_id = tweet_search['statuses'][len(tweet_search['statuses'])-1]['id'] - 1
            for tweet in tweet_search['statuses']:
                tweet_created_day = int(tweet['created_at'][8:10])
                if (tweet_created_day != day):
                    break_flag=True
                    break
                tweets.append(Tweet(tweet))
        return tweets
    
    def compete(self, product_to_compare, d=None, filter_tweets=True):
        tweets = self._CompetingProductFinder__fetch_tweets(product_to_compare, d=d)
        return len(self._CompetingProductFinder__filter_non_informative_tweets(tweets)) if filter_tweets == True else len(tweets)
    
    def compete_with_multiple_products(self, products_to_compare):
        tweets_count_by_day = {product_to_compare:{str(date.today()-timedelta(i+1)):0 for i in range(7)} for product_to_compare in products_to_compare}
        advertisment_counts = {product_to_compare:0 for product_to_compare in products_to_compare}
        total_tweets_counts = {product_to_compare:0 for product_to_compare in products_to_compare}
        date_list=[str(date.today()-timedelta(7-i)) for i in range(7)]
        for product_to_compare in products_to_compare:
            for i in range(7):
                tweets = self._CompetingProductFinder__fetch_tweets(product_to_compare, str(date.today()-timedelta(i)))
                tweets_count = len(tweets)
                advertisment_count = tweets_count - len(self._CompetingProductFinder__filter_non_informative_tweets(tweets))
                total_tweets_counts[product_to_compare] += tweets_count
                tweets_count_by_day[product_to_compare][str(date.today()-timedelta(i+1))] = tweets_count
                advertisment_counts[product_to_compare] += advertisment_count
                
        return {"tweets_count_by_day": tweets_count_by_day, "advertisment_counts": advertisment_counts, "total_tweets_counts": total_tweets_counts, "date_list":date_list}
    
def rank_location_by_tweets_count(location_statistics):
    sorted_location_statistics = sorted(location_statistics.items(), key=lambda x: x[1], reverse=True)
    return [location for (location, tweets_count) in sorted_location_statistics if location != 'unknown']

def rank_feature_by_mentions_count(feature_statistics):
    sorted_feature_statistics = sorted(feature_statistics.items(), key=lambda x: x[1], reverse=True)
    return [feature for (feature, mentions) in sorted_feature_statistics]

In [5]:
# function to plot all plots
def plotTabs(race_distribution, gender_distribution,city_list,pos_percent,neg_percent,attitude,feature_list,liked_feature,disliked_feature,TweetsDic, sumAdDic,sum7day,DateList):
    sentiment_distribution = { 'Positive toward target phone': pos_percent, 'Negative toward target phone': neg_percent}
    #Feature Rank by Mentioning Count:
    rank_list = [i + 1 for i in range(len(feature_list))]

    #Location Rank by Tweets Count:
    rank_city_list = [i + 1 for i in range(len(city_list))]
    
    # Spacing ELements
    space1 = Div(text='', sizing_mode='scale_height', width=75)
    space2 = Div(text='', sizing_mode='scale_height', height=100)
    #for height spacing, actually will use it!!!
    space3 = Div(text='', sizing_mode='scale_width', height=25)
    
    #################################### Content for Panel 1 ################################################
    tab1_header = Div(text='<div align="Left" style="display:block"><h3>Sentiment Analysis On Target Phone<h3><h5>Below will show the percentage of people who feel positive toward target phone and people who feel negative toward target phone.</h5><br></div>', width=700)    
    #pie chart for sentiment
    x = sentiment_distribution
    data1 = pd.Series(x).reset_index(name='value').rename(columns={'index':'sentiment'})
    data1['angle'] = data1['value']/data1['value'].sum() * 2*pi
    data1['color'] = CHART_COLORS[:len(x)]
    p1 = figure(plot_height=280, title="Sentiment Analysis on Target Phone", toolbar_location=None,tools="hover", tooltips="@sentiment: @value")
    p1.wedge(x=0, y=1, radius=0.4,start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),line_color="white", fill_color='color', legend='sentiment', source=data1)
    
    s1=Div(text='Overall sentiment on targeted phone: ',sizing_mode='scale_height', width=300)
    overall_sentiment=Div(text=attitude,sizing_mode='scale_height', width=75)
    r1 = row([s1,overall_sentiment],width=200)
    #########################################################################################################
    
    #################################### Content for Panel 2 ################################################
    tab2_header = Div(text='<div align="Left" style="display:block"><h3>Trending Features On Target Phone<h3><h5>The trending features of target phone are shown below as well as most liked feature and most disliked feature.</h5><br></div>', width=700)
    #table for most talk features
    data2 = dict(rank = rank_list,feature = feature_list)
    #table for most talk features
    source1 = ColumnDataSource(data2)
    columns1 = [TableColumn(field="feature",title="Trending Features On Target Phone")]
    dt = DataTable(source=source1,columns=columns1,width=300,height=280) 
    #Most like features and most dislike features
    s2 = Div(text='Most Liked Feature: ',sizing_mode='scale_height', width=150)
    most_liked_feature=Div(text=liked_feature,sizing_mode='scale_height', width=75)
    r2 = row([s2,most_liked_feature],width=150)
    s3 = Div(text='Most Disliked Feature: ',sizing_mode='scale_height', width=200)
    most_disliked_feature=Div(text=disliked_feature,sizing_mode='scale_height', width=75)
    r3 = row([s3,most_disliked_feature],sizing_mode='scale_height', width=200)
    c=column([r2,r3],height=100)
    #########################################################################################################
    
    #################################### Content for Panel 3 ################################################
    tab3_header = Div(text='<div align="Left" style="display:block"><h3>Location--Gender--Race Distribution<h3><h5>The gender,race,and location distribution of tweets.</h5><br></div>', width=700)
    #pie chart for gender distribution
    gender = gender_distribution
    data3 = pd.Series(gender).reset_index(name='value').rename(columns={'index':'gender'})
    data3['angle'] = data3['value']/data3['value'].sum() * 2*pi
    data3['color'] = CHART_COLORS_G[:len(gender)]
    p2 = figure(plot_height=280, title="Gender Distribution", toolbar_location=None,tools="hover", tooltips="@gender: @value")
    p2.wedge(x=0, y=1, radius=0.4,start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),line_color="white", fill_color='color', legend='gender', source=data3)
    
    #pie chart for race distribution
    race = race_distribution
    data4 = pd.Series(race).reset_index(name='value').rename(columns={'index':'race'})
    data4['angle'] = data4['value']/data4['value'].sum() * 2*pi
    data4['color'] = CHART_COLORS_R[:len(race)]
    #without tools="hover", to allow drag the piechart
    p3 = figure(plot_height=280, title="Race Distribution", toolbar_location=None, tooltips="@race: @value")
    p3.wedge(x=0, y=1, radius=0.4,start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),line_color="white", fill_color='color', legend='race', source=data4)
    
    #datatable for top location
    s4=Div(text='Location Distribution Ranked List',sizing_mode='scale_height', width=300)
    data5 = dict(rank = rank_city_list,city = city_list)
    #table for most talk features
    source2 = ColumnDataSource(data5)
    columns2 = [TableColumn(field="city",title="City")]
    dt2 = DataTable(source=source2,columns=columns2,width=300,height=280)
    
    c2=column([p2,p3],height=400)
    c3=column([c2,space2],height=500)
    c4=column([dt2,c3],height=500)
    c5=column([s4,c4],height=500)
    #########################################################################################################

    
    #################################### Content for Panel 4 ################################################ 
    
    tab4_header = Div(text='<div align="Left" style="display:block"><h3>Competing Products<h3><h5>Popularity of multiple competing products are shown in the plots below.</h5><br></div>', width=700)

    # Set the x_range to the list of categories above
    p4 = figure(x_range=phoneTagCopy, plot_height=250, title="Targeted Phone Related Tweets Counts In 7days")

    # Categorical values can also be used as coordinates
    print(len(PhoneListCopy))
    p4.vbar(x=phoneTagCopy, top=[sum7day[PhoneListCopy[i]] for i in range(len(PhoneListCopy))], width=0.5, fill_color='red',legend ='Tweets Number')
    p4.vbar(x=phoneTagCopy, top=[sumAdDic[PhoneListCopy[i]] for i in range(len(PhoneListCopy))], width=0.5,legend='Ads Number')
    # Set some properties to make the plot look better
    p4.xgrid.grid_line_color = None
    p4.y_range.start = 0
    
    
    colorList=['aqua', 'black', 'blue', 'fuchsia', 'gray', 'green', 'lime', 'maroon', 'navy']
    p5 = figure(plot_width=600, plot_height=500,title="Product's Tweets Number",x_range=DateList)
    for i in range(len(PhoneListCopy)):
        p5.line(DateList, [TweetsDic[PhoneListCopy[i]][DateList[j]] for j in range(7)], color=colorList[i],line_width=2, alpha=0.5,legend=phoneTagCopy[i])

    #########################################################################################################

    # Layout Panels
    panel1 = Panel(child=column([tab1_header,space1,r1,space3,p1], height=APP_HEIGHT, width=APP_WIDTH), title='Sentiment Analysis On Target Phone')
    panel2 = Panel(child=column([tab2_header,c,dt], height=APP_HEIGHT, width=APP_WIDTH), title='Trending Features')
    panel3 = Panel(child=column([tab3_header,c5], height=APP_HEIGHT, width=APP_WIDTH), title='Location--Gender--Race')
    panel4 = Panel(child=column([tab4_header,p4,p5], height=APP_HEIGHT, width=APP_WIDTH), title='Competing Products')
    tabs = Tabs(tabs=[panel1,panel2,panel3,panel4])

    return tabs

In [6]:
#bokeh server app
def create_doc(doc):  
    # Update the server page
    def update():
        spaceH = Div(text='',sizing_mode='scale_width', height=50)
        message1 = Div(text='<div align="center" style="display:block"><h2>Mining tweets from Twitter!</h2><iframe src="https://giphy.com/embed/52qtwCtj9OLTi" width="480" height="206" frameBorder="0"></iframe></div>', width=700)
        GUI.children[2] = column([spaceH,message1])
        #sleep for 3s to ensure getting data from user
        time.sleep(3)
        #get input from user
        phone = GUI.children[0].children[2].value
        #mining tweets and apply nlkt
        tweets = Tweets(phone, size=1000)
        if len(tweets.get_tweets()) == 0:
            spaceH = Div(text='',sizing_mode='scale_width', height=50)
            messageE = Div(text='<div align="center" style="display:block"><h2>Rate limit exceeded! Please wait 15min</h2><iframe src="https://giphy.com/embed/52qtwCtj9OLTi" width="480" height="206" frameBorder="0"></iframe></div>', width=700)
            GUI.children[2] = column([spaceH,messageE])
            return
        tweet_emotion_classifier = TweetEmotionClassifier()
        mobile_features_analyzer = MobileFeaturesAnalyzer(tweet_emotion_classifier)
        feature_statistics = mobile_features_analyzer.feature_statistics([tweet.get_text() for tweet in tweets.get_tweets()])
        location_statistics = tweets.location_statistics()
        
        #sleep for 3s to ensure finish mining tweets
        time.sleep(3)
        
        #analysis data 
        message2 = Div(text='<div align="center" style="display:block"><h2>Analysing tweets!</h2><iframe src="https://giphy.com/embed/52qtwCtj9OLTi" width="480" height="206" frameBorder="0"></iframe></div>', width=700)
        GUI.children[2] = column([spaceH,message2])
        race_stat = tweets.race_statistics()
        gender_stat = tweets.gender_statistics()
        location_stat_rank = rank_location_by_tweets_count(location_statistics)
        pos_sentiment = tweets.pos_attitude_percentage(tweet_emotion_classifier)
        neg_sentiment = tweets.neg_attitude_percentage(tweet_emotion_classifier)
        overall_sentiment = tweets.overall_attitude(tweet_emotion_classifier)
        feature_rank_list = rank_feature_by_mentions_count(feature_statistics)
        most_like = mobile_features_analyzer.most_liked_feature([tweet.get_text() for tweet in tweets.get_tweets()])
        #init competing product object
        competing_product_finder = CompetingProductFinder(phone)
        most_dislike = mobile_features_analyzer.most_disliked_feature([tweet.get_text() for tweet in tweets.get_tweets()])
        global phoneTagCopy
        global PhoneListCopy
        phoneTagCopy=copy.deepcopy(phoneTag)
        PhoneListCopy=copy.deepcopy(PhoneList)
        for ph in PhoneList:
            if difflib.SequenceMatcher(None, ph.lower(),phone.lower()).quick_ratio() >= 0.8 or (ph.lower() in phone.lower()):
                indexx=PhoneList.index(ph)
                phoneTagCopy.remove(phoneTagCopy[indexx])
                PhoneListCopy.remove(PhoneListCopy[indexx])
                break        
        
        compete_result = competing_product_finder.compete_with_multiple_products(PhoneListCopy)
        TweetsDic = compete_result['tweets_count_by_day']
        sumAdDic = compete_result['advertisment_counts']
        sum7day = compete_result['total_tweets_counts']
        DateList = compete_result['date_list']
        #sleep for 3s to ensure finish analysising tweets
        time.sleep(3)
                       
        # Display Results
        message3 = Div(text='<div align="center" style="display:block"><h2>Plotting results!</h2><iframe src="https://giphy.com/embed/52qtwCtj9OLTi" width="480" height="206" frameBorder="0"></iframe></div>', width=700)
        GUI.children[2] = column([spaceH,message2])
        time.sleep(3)
        #update result GUI
        GUI.children[2] = plotTabs(race_stat, gender_stat, location_stat_rank, pos_sentiment, 
                                   neg_sentiment, overall_sentiment, feature_rank_list, most_like, most_dislike,
                                   TweetsDic, sumAdDic,sum7day,DateList)
        # reset search menu to default
        #GUI.children[0] = buildMenu()
        
    #build search menu
    def buildMenu():
        # Buttons
        submit = Button(label='Search Phone', button_type='success')
        submit.on_click(update)
        phoneTag = Div(text='<h3>Phone Model Input</h3>', height=20)
        phone_model = TextInput(value=DEFAULT_PHONE, title='Phone Model:',sizing_mode='scale_width')
        H1 = Div(text='', height=200)
        H2 = Div(text='', height=200)
        menu = widgetbox([H1,phoneTag,phone_model,submit,H2], width=200)
        return menu
            
    # Initial Dashboard and Document
    seperator = Div(text='', sizing_mode='scale_height', width=75)
    spacing = Div(text='', sizing_mode='scale_width', height=50)
    intro = Div(text='<div align="center" style="display:block"><h2>Welcome to the MarketingResearch Tool!</h2><h3>Please use the controls in the left menu to begin.</h3></div>', width=700)
    p = column([spacing, intro])    
    GUI = row([buildMenu(), seperator, p], width=900)
    doc.add_root(GUI)

In [7]:
# Reset Output
reset_output()
output_notebook()
# Configure Document
handler = FunctionHandler(create_doc)
app = Application(handler)
doc = app.create_document()

In [8]:
show(app, new='window',notebook_url="localhost:8888", notebook_handle=True)

8
