In [7]:
from itertools import cycle
import json
import requests
import sys
import time
import traceback
from TwitterAPI import TwitterAPI


RATE_LIMIT_CODES = set([88, 130, 420, 429])

class Twitter:
	def get_twitter(credential_file):
		""" Read the credential_file and construct an instance of TwitterAPI.
		Args:
		credential_file ... A JSON file with twitter tokens
		Returns:
		An instance of TwitterAPI.
		"""
		tokens = json.loads(open('/Users/11977/.osna/credentials.json').read())
		twitter = TwitterAPI(
			tokens['consumer_key'],
			tokens['consumer_secret'],
			tokens['access_token'],
			tokens['token_secret']        
			)
		return twitter

	
	def robust_request(twitter, resource, params, max_tries=5):
		""" If a Twitter request fails, sleep for 15 minutes.
		Do this at most max_tries times before quitting.
		Args:
		twitter .... A TwitterAPI object.
		resource ... A resource string to request.
		params ..... A parameter dictionary for the request.
		max_tries .. The maximum number of tries to attempt.
		Returns:
		A TwitterResponse object, or None if failed.
		"""
		for i in range(max_tries):
			request = twitter.request(resource, params)
			if request.status_code == 200:
				return request
			else:
				print('Got error: %s \nsleeping for 15 minutes.' 
					% request.text)
				time.sleep(61 * 15)

	def __init__(self, credential_file):
		"""
		Params:
		  credential_file...list of JSON objects containing the four 
		  required tokens: consumer_key, consumer_secret, access_token, access_secret
		"""
		self.credentials = [json.loads(l) for l in open(credential_file)]
		self.credential_cycler = cycle(self.credentials)
		self.reinit_api(credential_file)

	def reinit_api(self,credential_file):
		creds = next(self.credential_cycler)
		sys.stderr.write('switching creds to %s\n' % creds['consumer_key'])
		self.twapi = TwitterAPI(creds['consumer_key'],
								creds['consumer_secret'],
								creds['access_token'],
								creds['token_secret'])


	def request(self, endpoint, params):
		while True:
			try:
				response = self.twapi.request(endpoint, params)
				if response.status_code in RATE_LIMIT_CODES:
					for _ in range(len(self.credentials)-1):
						self.reinit_api()
						response = self.twapi.request(endpoint, params)
						if response.status_code not in RATE_LIMIT_CODES:
							return response
					sys.stderr.write('sleeping for 15 minutes...\n')
					time.sleep(910) # sleep for 15 minutes # FIXME: read the required wait time.
					return self.request(endpoint, params)
				else:
					return response
			except requests.exceptions.Timeout:
				# handle requests.exceptions.ConnectionError Read timed out.
				print("Timeout occurred. Retrying...")
				time.sleep(5)
				self.reinit_api()

	def followers_for_id(self, theid, limit=1e10):
		return self._get_followers('user_id', theid, limit)

	def followers_for_screen_name(self, screen_name, limit=1e10):
		return self._get_followers('screen_name', screen_name, limit)

	def _get_followers(self, identifier_field, identifier, limit=1e10):
		return self._paged_request('followers/ids',
								   {identifier_field: identifier,
									'count': 5000,
			                        'stringify_ids': True},
			                        limit)

	def friends_for_id(self, theid, limit=1e10):
		return self._get_friends('user_id', theid, limit)

	def friends_for_screen_name(self, screen_name, limit=1e10):
		return self._get_friends('screen_name', screen_name, limit)

	def _get_friends(self, identifier_field, identifier, limit=1e10):
		return self._paged_request('friends/ids',
								   {identifier_field: identifier,
									'count': 5000,
			                        'stringify_ids': True},
			                        limit)

	def _paged_request(self, endpoint, params, limit):
		results = []
		cursor = -1
		while len(results) <= limit:
			try:
				response = self.request(endpoint, params)
				if response.status_code != 200:
					sys.stderr.write('Skipping bad request: %s\n' % response.text)
					return results
				else:
					result = json.loads(response.text)
					items = [r for r in response]
					if len(items) == 0:
						return results
					else:
						sys.stderr.write('fetched %d more results for %s\n' % 
							(len(items), params['screen_name'] if 'screen_name' in params else params['user_id']))
						time.sleep(1)
						results.extend(items)
					params['cursor'] = result['next_cursor']
			except Exception as e:
				sys.stderr.write('Error: %s\nskipping...\n' % e)
				sys.stderr.write(traceback.format_exc())
				return results
		return results


	def tweets_for_id(self, theid, limit=1e10):
		return self._get_tweets('user_id', theid, limit)

	def tweets_for_screen_name(self, screen_name, limit=1e10):
		return self._get_tweets('screen_name', screen_name, limit)

	def _get_tweets(self, identifier_field, identifier, limit=1e10):
		max_id = None
		tweets = []
		while len(tweets) < limit:
			try:
				params = {identifier_field: identifier, 'count': 200,
						  'max_id': max_id, 'tweet_mode': 'extended', 'trim_user': 0}
				if max_id:
					params['max_id'] = max_id
				response = self.request('statuses/user_timeline', params)
				if response.status_code == 200:  # success
					items = [t for t in response]
					if len(items) > 0:
						sys.stderr.write('fetched %d more tweets for %s\n' % (len(items), identifier))
						tweets.extend(items)
					else:
						return tweets
					max_id = min(t['id'] for t in response) - 1
				else:
					sys.stderr.write('Skipping bad user: %s\n' % response.text)
					return tweets
			except Exception as e:
				sys.stderr.write('Error: %s\nskipping...\n' % e)
				sys.stderr.write(traceback.format_exc() + '\n')
				return tweets
		return tweets

In [26]:
#from ..mytwitter import Twitter
#from ..u import get_twitter_data, N_TWEETS
#from .. import credentials_path, clf_path
import pickle
import sys
import json
import numpy as np

from TwitterAPI import TwitterAPI
#twapi = Twitter(credentials_path)
credentials_path = 'C:\\Users\\11977\\.osna\\credentials.json'
clf_path = 'C:\\Users\\11977\\.osna\\clf.pkl'
clf, vec = pickle.load(open(clf_path, 'rb'))
print('read clf %s' % str(clf))
print('read vec %s' % str(vec))

result = None
sort_coef=[]
coef = [-clf.coef_[0], clf.coef_[0]]
features = np.array(vec.get_feature_names())
for i in range(0,len(coef[0])):
    sort_coef.append([coef[0][i],features[i]])
myList = sorted(sort_coef, key=lambda x: x[0])

t = Twitter(credentials_path)
all_tweets = t._get_tweets('screen_name','pixy_qi',limit=200)
tweets = [words['full_text'] for words in all_tweets]
X = vec.transform(text for text in tweets)
y = clf.predict(X)
proba = clf.predict_proba(X)
ans = []

for i in range(0,len(tweets)):
    flag='[hostile] '
    if y[i]==0:
        flag='[non-hostile] '
    p='[probability='+str(proba[i,y[i]])+'] '
    # 
    for j in (np.argsort(coef[0][X[i].nonzero()[1]]))[::-1][:3]:
        idx = X[i].nonzero()[1][j]
        coef_text='[coef='+str(features[idx])+': '+str(coef[0][idx])+']'
        ans.append(flag+p+tweets[i]+coef_text)

read clf LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)
read vec CountVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), preprocessor=None, stop_words=None,
        strip_accents=None, token_pattern='(?u)\\b\\w\\w+\\b',
        tokenizer=None, vocabulary=None)


switching creds to QGRilWLkpzANIrEBSCpGeHQY0
fetched 16 more tweets for pixy_qi


In [27]:
ans

['[non-hostile] [probability=0.6701541240260448] PYTHON is harassment to me!!! https://t.co/vOr0zAGzlZ[coef=me: 0.26709623986324305]',
 '[non-hostile] [probability=0.6701541240260448] PYTHON is harassment to me!!! https://t.co/vOr0zAGzlZ[coef=https: 0.1651799175972774]',
 '[non-hostile] [probability=0.6701541240260448] PYTHON is harassment to me!!! https://t.co/vOr0zAGzlZ[coef=is: -0.027379374896934847]',
 '[non-hostile] [probability=0.6528847303626115] where am I https://t.co/Sde9NNHvTc[coef=am: 0.18088842326046406]',
 '[non-hostile] [probability=0.6528847303626115] where am I https://t.co/Sde9NNHvTc[coef=https: 0.1651799175972774]',
 '[non-hostile] [probability=0.6528847303626115] where am I https://t.co/Sde9NNHvTc[coef=co: -0.2431274061932191]',
 '[non-hostile] [probability=0.9258752011507644] gjl does not reply to my. sad[coef=reply: 1.0968302922849098]',
 '[non-hostile] [probability=0.9258752011507644] gjl does not reply to my. sad[coef=not: 0.342802414869398]',
 '[non-hostile] [p

In [16]:
coef[0][X[0].nonzero()[1]]

array([-0.24312741, -0.18871187,  0.16517992, -0.02737937,  0.26709624,
       -0.10736773])

In [21]:
myList

[[-2.975670198444058, 'twat'],
 [-2.945656160678739, 'cunt'],
 [-2.2915728849916914, 'realdonaldtrump'],
 [-2.2590774744061366, 'retard'],
 [-1.7348355236414523, 'shut'],
 [-1.6805018924173742, 'stupid'],
 [-1.6765160610820757, 'pussy'],
 [-1.6012847870464584, 'faggot'],
 [-1.482944230072471, 'stfu'],
 [-1.4347425458495693, 'cum'],
 [-1.417957030928721, 'anal'],
 [-1.3546308724724327, 'slut'],
 [-1.3504399107523517, 'donaldjtrumpjr'],
 [-1.3404367708965732, 'dumbass'],
 [-1.2536157270005492, 'ass'],
 [-1.2519352020194368, 'dick'],
 [-1.2431481117794767, 'suck'],
 [-1.2020295332611934, 'thug'],
 [-1.1982446082070775, 'hole'],
 [-1.187747391375843, 'jihadi'],
 [-1.1784482260277562, 'mouth'],
 [-1.1775819479964906, 'trash'],
 [-1.1756952436383106, 'retarded'],
 [-1.174255583601365, 'penis'],
 [-1.1651815328379216, 'lying'],
 [-1.1502524636474438, 'ugly'],
 [-1.1382131086219944, 'fag'],
 [-1.1341039568084021, 'corny'],
 [-1.126798511833512, 'hoe'],
 [-1.123553859816339, 'plz'],
 [-1.111597