In [29]:
import tweepy
import pandas as pd
from googleapiclient import discovery
from langdetect import detect
import json,requests,re,os

In [81]:
twitter_keys_path = "/etc/twitter-api-key.csv"
google_api_keys_path = "google-api-key.csv"

In [104]:
## twitter
creds = pd.read_csv(twitter_keys_path)
consumer_key = creds['consumer_key'][0]
consumer_secret = creds['consumer_secret'][0]
access_key = creds['access_token'][0]
access_secret = creds['access_token_secret'][0]

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)

In [105]:
## google api
API_KEY = pd.read_csv(google_api_keys_path)['google-key'][0]

## Code for stringing tweets

In [91]:
TWEET_BATCH_NUM = 3
PERSPECTIVE_MODELS = ['TOXICITY', 'IDENTITY_ATTACK', 'INSULT', 'PROFANITY','THREAT','SEXUALLY_EXPLICIT', 'FLIRTATION']

In [92]:
models_setting_json = {}
for model in PERSPECTIVE_MODELS:
    print(model)
    models_setting_json[model] = {'scoreThreshold': '0'}

TOXICITY
IDENTITY_ATTACK
INSULT
PROFANITY
THREAT
SEXUALLY_EXPLICIT
FLIRTATION


In [93]:
def get_user_timeline(screenName, tweetCount):
    statuses = api.user_timeline(screen_name = screenName,count=tweetCount, tweet_mode="extended")
    tweet_info = {}
    for tweet in statuses:
        if hasattr(tweet, 'retweeted_status'):
            tweet_info[tweet.id] = {'text': tweet.retweeted_status.full_text, 'tweet_time': tweet.created_at}
        else:
            tweet_info[tweet.id] = {'text': tweet.full_text, 'tweet_time': tweet.created_at}

    return tweet_info

In [94]:
def clean_tweets(tweets):
    cleaned_tweets = []
    for tweet_id, tweet in tweets.items():
        try:
            if(detect(tweet['text']) == 'en'):
                cleaned_tweet = re.sub(r'(@\S+)|(http\S+)', " ", str(tweet['text']))
                if(cleaned_tweet and cleaned_tweet.strip()):
                    cleaned_tweets.append({'cleaned_tweet': cleaned_tweet, 
                                           'original_tweet': tweet['text'],
                                           'tweet_time': tweet['tweet_time']})
        except Exception as e:
#             print('Exception wheen cleaning- Tweet in response: ' + tweet['text'])
            print(e)
    print(len(cleaned_tweets))
    return cleaned_tweets

In [95]:
## string the tweets together!
def get_batched_tweets(tweets):
    cleaned_user_timeline_tweets = clean_tweets(tweets)
    batched_tweets = []
    if len(cleaned_user_timeline_tweets) > TWEET_BATCH_NUM:
        each_batch_size = int(len(cleaned_user_timeline_tweets)/TWEET_BATCH_NUM)
        for i in range(0, TWEET_BATCH_NUM):
            batched_tweets.append(cleaned_user_timeline_tweets[i*each_batch_size : (i+1)*each_batch_size])
    else:
        batched_tweets = cleaned_user_timeline_tweets
    return batched_tweets

In [96]:
def get_user_perspective_score(batched_tweets, models_setting_json, twitter_account):
    # need to add multiple keys
    service = discovery.build('commentanalyzer', 'v1alpha1', developerKey=API_KEY,  cache_discovery=False)
    tweets_with_perspective_scores = []
    tweet_count = 0

    # where we finally store the scores
    user_perspective_scores_json = {}
    temp_scores = {}
    for model in PERSPECTIVE_MODELS:
        temp_scores[model] = []

    for i in range(0, TWEET_BATCH_NUM):
        tweet_string = ''
        for tweet in batched_tweets[i]:
            tweet_string += tweet['cleaned_tweet'] + '\n'
            tweet_string = tweet_string.encode('utf-8')

        print('length: ' + str(len(tweet_string)))

        analyze_request = {
                        'comment': { 'text': tweet_string},
                        'requestedAttributes': models_setting_json}
        try:
            response = service.comments().analyze(body=analyze_request).execute()
            # print(response)
            if(response['attributeScores']):
                for model in PERSPECTIVE_MODELS:
                    if model in response['attributeScores']:
                        temp_scores[model].append(response['attributeScores'][model]['summaryScore']['value'])

        except Exception as e:
            print(e)
            print('Exception when getting perspective scores ' + twitter_account)
            print(len(tweet_string))

    for model in PERSPECTIVE_MODELS:
        if len(temp_scores[model]) > 0:
            print(temp_scores[model])
            user_perspective_scores_json[model] = sum(temp_scores[model])/len(temp_scores[model])
            print(user_perspective_scores_json[model])
        else:
            user_perspective_scores_json[model] = None


    return user_perspective_scores_json

In [106]:
twitter_account = 'im__jane'
TWEET_NUM = 200

tweets = get_user_timeline(twitter_account, TWEET_NUM)

In [107]:
batched_tweets = get_batched_tweets(tweets)

'ascii' codec can't encode character u'\u2019' in position 8: ordinal not in range(128)
'ascii' codec can't encode character u'\u2019' in position 235: ordinal not in range(128)
'ascii' codec can't encode characters in position 49-52: ordinal not in range(128)
'ascii' codec can't encode character u'\u2019' in position 76: ordinal not in range(128)
'ascii' codec can't encode characters in position 1-11: ordinal not in range(128)
'ascii' codec can't encode characters in position 111-112: ordinal not in range(128)
'ascii' codec can't encode characters in position 176-177: ordinal not in range(128)
'ascii' codec can't encode characters in position 68-69: ordinal not in range(128)
'ascii' codec can't encode character u'\u2019' in position 116: ordinal not in range(128)
'ascii' codec can't encode characters in position 27-28: ordinal not in range(128)
'ascii' codec can't encode characters in position 194-195: ordinal not in range(128)
'ascii' codec can't encode characters in position 184-185

In [108]:
get_user_perspective_score(batched_tweets, models_setting_json, twitter_account)

length: 3056
length: 3627
length: 4196
[0.18453915, 0.23787954, 0.3217358]
0.248051496667
[0.25080234, 0.35056716, 0.38586828]
0.32907926
[0.21692717, 0.25511935, 0.30610135]
0.259382623333
[0.1577357, 0.1489321, 0.22514658]
0.17727146
[0.212965, 0.2665316, 0.3757453]
0.285080633333
[0.3035996, 0.15463679, 0.31295457]
0.257063653333
[0.46974406, 0.47966555, 0.4480788]
0.46582947


{'FLIRTATION': 0.46582947,
 'IDENTITY_ATTACK': 0.32907926000000004,
 'INSULT': 0.25938262333333334,
 'PROFANITY': 0.17727146000000002,
 'SEXUALLY_EXPLICIT': 0.2570636533333333,
 'THREAT': 0.28508063333333333,
 'TOXICITY': 0.24805149666666668}