# tweepy

In [38]:
import pandas as pd
import numpy as np
import os
import tweepy

In [222]:
tweepy.__version__

'3.5.0'

## 認証

### key/token

In [6]:
!echo ".env" > .gitignore

In [10]:
%%bash
cat <<EOF > .env
consumer_key=
consumer_secret=
access_token=
access_token_secret=
EOF

In [17]:
%load_ext dotenv
%dotenv

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [117]:
consumer_key = os.environ["consumer_key"]
consumer_secret = os.environ["consumer_secret"]
access_token = os.environ["access_token"]
access_token_secret = os.environ["access_token_secret"]

In [467]:
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth)

In [119]:
type(api)

tweepy.api.API

### bash for python variable
<https://stackoverflow.com/questions/19579546/can-i-access-python-variables-within-a-bash-or-script-ipython-notebook-c>

In [603]:
var_for_bash = 111

In [605]:
!echo {var_for_bash}

111


In [609]:
%%bash -s $var_for_bash
echo $1
echo {var_for_bash}

111
{var_for_bash}


## 認証からアクセストークンとシークレットキーを取得

In [607]:
redirect_url = auth.get_authorization_url()

In [608]:
!open {redirect_url}

- ブラウザで「アプリ名」にアカウントの利用を許可しますか？の画面を開いたら、
- 認証をして
- https://設定したURL?oauth_token=xxx&oauth_verifier=yyy にリダイレクトする
- 自分の場合、https://twitter.com/ksomemo なので特に処理をすることなくURLを閲覧できる
- oauth_verifier の値を下記に渡す

In [601]:
verifier = input('oauth_verifier: ')

In [600]:
_ = auth.get_access_token(verifier)

## APIの呼び出し
1. API modelの各APIはpropertyを呼び出す
2. propertyからはbind_api関数を呼び出す
3. bind_api呼び出し時に内部でAPIMethodクラスを作成(インスタンス生成ではない)
4. APIMethodクラスには呼び出し時に指定した設定がクラスにbindされる
5. 最終的には上記クラスからインスタンスを作成しAPI実行メソッドを呼び出す関数(_call)を返す
5. property呼び出しなので、上記はapi.api_name(args)は_call(args, kwargs)となる
5. kwargsにcreateが含まれその値がTrueと判断される場合、新たにAPIMethodクラスのインスタンス作成をする

In [237]:
api.get_status

<function tweepy.binder.bind_api.<locals>._call>

In [226]:
try:
    api.get_status(create=False)
except Exception as e:
    print(e)

[{'code': 144, 'message': 'No status found with that ID.'}]


In [238]:
api_method = api.get_status(create=True)
api_method

<tweepy.binder.bind_api.<locals>.APIMethod at 0x10fb37a58>

In [240]:
try:
    api_method.execute()
except Exception as e:
    print(e)

[{'code': 144, 'message': 'No status found with that ID.'}]


In [230]:
from tweepy.binder import bind_api
binded_api = bind_api(
    api=api,
    path='/my/api.json',
    method='POST',
    payload_type='user',
    allowed_param=['id', 'user_id', 'screen_name'],
    require_auth=True
)
binded_api

<function tweepy.binder.bind_api.<locals>._call>

In [233]:
my_api_method = binded_api(create=True)
my_api_method

<tweepy.binder.bind_api.<locals>.APIMethod at 0x10fb7ab38>

In [235]:
try:
    my_api_method.execute()
except Exception as e:
    print(e)

[{'code': 34, 'message': 'Sorry, that page does not exist'}]


In [236]:
try:
    binded_api()
except Exception as e:
    print(e)

[{'code': 34, 'message': 'Sorry, that page does not exist'}]


## API Limit

In [398]:
rate = api.rate_limit_status()
rate_df = pd.io.json.json_normalize(rate).T
rate_df = rate_df[rate_df.index.str.endswith("remaining")]
rate_df = rate_df[rate_df.index.str.contains("search")]
rate_df

Unnamed: 0,0
resources.geo./geo/search.remaining,15
resources.saved_searches./saved_searches/destroy/:id.remaining,15
resources.saved_searches./saved_searches/list.remaining,15
resources.saved_searches./saved_searches/show/:id.remaining,15
resources.search./search/tweets.remaining,180
resources.users./users/search.remaining,900


## User model

In [22]:
me = api.me()
type(me)

tweepy.models.User

### Userのkey

In [136]:
user_keys = list(me._json.keys())
len(user_keys)

46

In [142]:
pd.DataFrame(
    np.array(user_keys + [""] * 4).reshape(10, 5)
)

Unnamed: 0,0,1,2,3,4
0,time_zone,location,name,url,profile_background_image_url_https
1,description,default_profile,is_translation_enabled,is_translator,profile_text_color
2,notifications,statuses_count,profile_background_tile,lang,needs_phone_verification
3,utc_offset,has_extended_profile,profile_use_background_image,suspended,id
4,favourites_count,status,protected,geo_enabled,profile_sidebar_border_color
5,created_at,profile_background_image_url,profile_image_url_https,verified,contributors_enabled
6,listed_count,profile_background_color,profile_link_color,following,profile_sidebar_fill_color
7,friends_count,translator_type,id_str,follow_request_sent,profile_banner_url
8,profile_image_url,entities,profile_location,default_profile_image,followers_count
9,screen_name,,,,


## 属性とjson化による違いがないか確認

In [45]:
attrs = [attr for attr in dir(me)
           if not callable(getattr(me, attr)) and not attr.startswith("_")]

In [68]:
me_series = pd.Series({attr: getattr(me, attr) for attr in attrs})

### keyごとの確認

In [115]:
me_series.index[:3]

Index(['contributors_enabled', 'created_at', 'default_profile'], dtype='object')

In [77]:
(me_series.index == me_json2_series.index).all()

True

### 値ごとの確認

In [78]:
is_same = (me_series == me_json2_series)
is_same.value_counts() 

True     43
False     3
dtype: int64

In [84]:
diff = pd.concat([
    me_series[~ is_same],
    me_json2_series[~ is_same]
], axis=1)
diff

Unnamed: 0,0,1
created_at,2011-07-09 08:09:35,Sat Jul 09 08:09:35 +0000 2011
profile_location,,
status,"Status(id=896303711286206464, geo=None, coordi...","{'id': 896303711286206464, 'geo': None, 'coord..."


### ISOフォーマットでない日時を変換できるか比較

In [108]:
pd.to_datetime(diff.loc["created_at", 1])

Timestamp('2011-07-09 08:09:35')

### Null系の比較

In [112]:
(
    np.nan == np.nan,
    np.isnan(np.nan),
    pd.isnull(np.nan),
    pd.isnull(None),
    # np.isnan(None) # error
)

(False, True, True, True)

### Status modelとjsonの比較

In [186]:
user_status_model = diff.loc["status", 0]
type(user_status_model)

tweepy.models.Status

In [187]:
# https://stackoverflow.com/questions/4527942/comparing-two-dictionaries-in-python
set(user_status_model._json) == set(diff.loc["status", 1])

True

## statusのkey

In [189]:
user_status_keys = list(user_status_model._json.keys())
len(user_status_keys)

24

In [190]:
pd.DataFrame(
    np.array(user_status_keys + [""]).reshape(5, 5)
)

Unnamed: 0,0,1,2,3,4
0,id,geo,coordinates,retweeted_status,in_reply_to_user_id_str
1,text,contributors,in_reply_to_status_id,favorited,created_at
2,source,retweeted,retweet_count,is_quote_status,place
3,in_reply_to_status_id_str,in_reply_to_screen_name,entities,possibly_sensitive,id_str
4,in_reply_to_user_id,truncated,lang,favorite_count,


## tweet
- from api function
    - <http://docs.tweepy.org/en/v3.5.0/api.html>
- from status method
    - <https://github.com/tweepy/tweepy/blob/v3.5.0/tweepy/models.py>

In [122]:
status = api.update_status(status='1, Updating using OAuth authentication via Tweepy!')
type(status)

tweepy.models.Status

### tweet status

In [191]:
status_keys = list(status._json.keys())
len(status_keys)

24

In [192]:
pd.DataFrame(
    np.array(status_keys + [""]).reshape(5, 5)
)

Unnamed: 0,0,1,2,3,4
0,id,geo,coordinates,retweeted_status,in_reply_to_user_id_str
1,text,contributors,in_reply_to_status_id,favorited,created_at
2,source,retweeted,retweet_count,is_quote_status,place
3,in_reply_to_status_id_str,in_reply_to_screen_name,entities,id_str,in_reply_to_user_id
4,user,truncated,lang,favorite_count,


In [194]:
set(user_status_keys) - set(status_keys)

{'possibly_sensitive'}

In [195]:
set(status_keys) - set(user_status_keys)

{'user'}

In [197]:
user_status_model.possibly_sensitive

False

In [145]:
status.retweeted, status.favorited

(False, False)

In [177]:
type(status.user)

tweepy.models.User

### retweet

In [148]:
status = status.retweet()
status.retweeted, status.retweet_count

(True, 1)

### retweets

- 自分の場合は含まれない?
- <https://dev.twitter.com/rest/reference/get/statuses/retweets/id>

In [149]:
retweets = status.retweets()
retweets

[]

### get status

In [207]:
deep_learning_tweet_id = 839463666059599872

In [208]:
deep_learning_tweet1 = api.get_status(str(deep_learning_tweet_id))
type(deep_learning_tweet1)

tweepy.models.Status

In [209]:
tweets = api.statuses_lookup([str(deep_learning_tweet_id), deep_learning_tweet_id])
deep_learning_tweet2 = tweets[0]

In [206]:
len(tweets)

1

In [170]:
print(deep_learning_tweet1.text)

楽しかった＼深層学習！／

いっぱい走った＼GPU！／

見つからなかった＼hyperparameter！／

徐々に消えていく＼勾配！／

影で糸を引く＼NVIDIA！／

突如現れた＼論文！／

一夜にして崩れ去る＼僕の研究！／

遂に目覚めた＼AIの力！／ True


In [199]:
retweets = deep_learning_tweet.retweets()
len(retweets)

16

In [162]:
type(retweets), type(retweets[0])

(tweepy.models.ResultSet, tweepy.models.Status)

In [202]:
retweets_status_keys = list(retweets[0]._json.keys())
len(retweets_status_keys)

24

In [203]:
pd.DataFrame(
    np.array(retweets_status_keys + [""]).reshape(5, 5)
)

Unnamed: 0,0,1,2,3,4
0,id,geo,coordinates,retweeted_status,in_reply_to_user_id_str
1,text,contributors,in_reply_to_status_id,favorited,created_at
2,source,retweeted,retweet_count,is_quote_status,place
3,in_reply_to_status_id_str,in_reply_to_screen_name,entities,id_str,in_reply_to_user_id
4,user,truncated,lang,favorite_count,


In [204]:
set(retweets_status_keys) - set(status_keys)

set()

In [180]:
[(s.id, s.user.name, s.user.screen_name) for s in retweets]

[(884439627511103492, 'Takehiko Hashimoto', 'takenotabi'),
 (884418026216370177, 'yasunori1978', 'yasunori1978'),
 (884417309548216320, 'Tomoya', 'BbugreportT'),
 (847843890283520000, 'アキ', 'Aki_Ta_24'),
 (844154189580124160, 'しゃちきゅん@まよい', 'KHsessu'),
 (843971883640938496, '加藤真也', '0x19f'),
 (843969645824507904, 'カエデbot', 'ahahsak'),
 (843966508841615361, '開発室Graph\uf8ff', 'studio_graph'),
 (843966492777447425, '矢場泰斗', 'sntaito'),
 (843966415677804544, 'ねこ', 'takayu_k'),
 (843965829469224960, 'nooyosh', 'nooyosh'),
 (843726975608930305, 'たかとー 🌸ΩΣ', 'takatoh1'),
 (843706790864801792, '太い樹', 'futoiki'),
 (843702128216481793, 'しゅんしんくん', 'koryor'),
 (843701970976169984, 'Masahiro Yamada', 'myamadakg'),
 (843696208904241152, 'ᴉnʞnℲ ᴉɥsoɹᴉH', 'Catechine0125')]

In [214]:
retweets_100 = api.retweets(deep_learning_tweet_id, count=100)
len(retweets_100)

87

<https://dev.twitter.com/rest/reference/get/statuses/retweeters/ids> is not implemented

## retweets_of_me
- RTされたもの
- <https://dev.twitter.com/rest/reference/get/statuses/retweets_of_me>

In [217]:
retweets_of_me = api.retweets_of_me()

In [218]:
[(s.user.name, s.text) for s in retweets_of_me[:5]]

[('○○めも(時間の浪費)',
  'Facebookで育ったので、人間関係は逃げる場所がなく、受け入れてもらうスキルを磨かないと生きていけなかった。息がつまる日々だった。その点Twitterは付き合う人を選べるだけの懐の深さがあって、息がしやすい。'),
 ('○○めも(時間の浪費)', '1, Updating using OAuth authentication via Tweepy!'),
 ('○○めも(時間の浪費)',
  '“本書は HTTP に関する内容を一冊に集めることを目的として企画されていて、「WebAPI: The Good Parts」と「ハイパフォーマンスブラウザネットワーキング」の間におさまる本として書かれている” / “「Real…” https://t.co/9ruqej0sqD'),
 ('○○めも(時間の浪費)',
  '“ドキュメント自動検査（校正）ツール RedPen に，バージョン 1.9.0 から待望の reST パーサーが入りました。” / “[Tips] Travis CI + RedPen で reST ドキュメントを継続的検査する…” https://t.co/3tIGsClMDF'),
 ('○○めも(時間の浪費)',
  '“1章：数値流体解析やりなおしのための流体力学超入門 2章：Riemann 幾何学ユーザーのための情報幾何学入門” / “技術者のためのやさしい数学 / DLmarket” https://t.co/lqvty3TqX3')]

### favorite

In [129]:
status = status.favorite()
status.favorited, status.favorite_count

(True, 1)

In [130]:
status = api.destroy_favorite(status.id)
status.favorited, status.favorite_count

(False, 0)

## favorites
- お気に入り一覧
- <https://dev.twitter.com/rest/reference/get/favorites/list>

In [219]:
favorites = api.favorites()

In [220]:
len(favorites)

20

In [259]:
[(f.user.name, f.text[:15].replace("\n", " ")) for f in favorites]

[('松本 勇気', '若者は他人のこと気にせずコード'),
 ('Mr. ベイエリア', '日本人、何かを始めるにはそれに'),
 ('t_yano', 'core.asyncでサーバ書'),
 ('統計たん@青の子可愛い', '統計的仮説検定は有意差があるな'),
 ('Mamoru Komachi', '8月9日の日記。毎年少しずつ更'),
 ('とーます', 'SMBCから完全に資金を引き揚'),
 ('Kentaro Matsuura', 'Stanマニュアルの日本語訳の'),
 ('眼力 玉壱號', 'ハチミツの代わりに浅田飴の水飴'),
 ('Mr.Unadon', 'もうすこしで、夏の仕事の一つが'),
 ('🐈🐈🐈🐈🐈🐈🐈🐈', '他人のメアドを勝手に教える輩、'),
 ('TJO', 'Googleマップの駐車場混雑'),
 ('⭕️', '「CRFをResNetみたいに'),
 ('しましま', '「しましまのKDD2017まと'),
 ('しましま', 'それにしても，lassoまわり'),
 ('しましま', '特徴選択のチュートリアル ht'),
 ('Sebastian Raschka', 'Just put up a t'),
 ('Mr. ベイエリア', '書いてある文章をありのままに理'),
 ('いしたー', '人工知能本当に範囲が広くて神経'),
 ('超循環評価器', 'Welcome to A/B '),
 ('Amazon Web Services', 'Create a Python')]

## Cursor
- <https://github.com/tweepy/tweepy/blob/v3.5.0/tweepy/binder.py#L247>
- <https://github.com/tweepy/tweepy/blob/v3.5.0/tweepy/cursor.py>
- prev?

In [275]:
class MyIterator(object):
    def __init__(self, values):
        self.values = values
        self.index = 0
        
    def __iter__(self):
        return self
    
    def __next__(self):
        if self.index == len(self.values):
            raise StopIteration()
        value = self.values[self.index]
        self.index += 1
        return value
    
#     def prev(next):
#         pass

In [276]:
my_it = MyIterator([1,2,3])
for v in my_it:
    print(v)

1
2
3


## user_timeline with Cursor

### limit count

In [254]:
for s in tweepy.Cursor(api.user_timeline, screen_name="ksomemo").items(25):
    # not include retweeted_status
    print(s.user.name,
           s.retweeted,
          "RT @" in s.text,
          s.is_quote_status, s.text[:20].replace("\n", " "),
          sep="\t")


○○めも(時間の浪費)	False	False	True	今の https://t.co/VqAl
○○めも(時間の浪費)	False	True	False	RT @ksomemo 昔のあれテスト
○○めも(時間の浪費)	False	False	False	“Quote Tweet API - R
○○めも(時間の浪費)	False	False	False	Githubの草データ使ってランダムフォ
○○めも(時間の浪費)	True	True	False	RT @TrsNium: 最近濃ゆい h
○○めも(時間の浪費)	True	True	False	RT @nagamyu: フェネック お
○○めも(時間の浪費)	False	False	False	絵のタイプが変わってる
○○めも(時間の浪費)	True	True	False	RT @Sakine3901: ムサシの
○○めも(時間の浪費)	True	True	False	RT @7shi: 四元数に興味を持って
○○めも(時間の浪費)	False	False	False	競技プログラミングでCython使えない
○○めも(時間の浪費)	False	False	False	油系と、クリーム系と、スープ系スパゲティ
○○めも(時間の浪費)	True	True	False	RT @simizut22: 日経コンピ
○○めも(時間の浪費)	True	True	False	RT @todesking: "Clas
○○めも(時間の浪費)	False	False	False	真の乱数はproductionで使うとし
○○めも(時間の浪費)	True	True	True	RT @TomoyukiMorimae:
○○めも(時間の浪費)	True	True	False	RT @OE_uia: OSS活動で役立
○○めも(時間の浪費)	False	False	False	なんで使っているか気になっている部分の解
○○めも(時間の浪費)	False	False	False	“DP (Dynamic Program
○○めも(時間の浪費)	False	False	False	あのときの自分へにさすがに含まれてなかっ
○○めも(時間の浪費)	True	True	False	RT @inao: WEB+DB PRE
○○めも(時間の浪費)

### limit page

In [257]:
# Only iterate through the first 3 pages
for page_no, page in enumerate(tweepy.Cursor(api.home_timeline).pages(2), 1):
    print("page:", page_no, "-" * 10)
    for s in page:
        print(s.user.name, end=", ")
    print("")

page: 1 ----------
Chris Birchall, EdSurge, 英語たん@8年目,ツイート改訂なう, 厄年, Analytics Vidhya, platypus, MIT Tech Review, Yuya Takeyama, Machine Learning, フェリン, Manning Publications, フェリン, Gogengo! - 英単語 語源, dj patil, One R Tip a Day, Amazon Web Services, 切り取り線, platypus, きんいろモザイクで英語bot, Ken, 
page: 2 ----------
Yuya Takeyama, トデス子'\, Trs, 吉田光男, Ph.D.; bot, 値, フェリン, Kazuki Yoshida, ざら速(ザラ場速報), MURAOKA Taro, 💦, Kaggle Datasets, まーす, ざら速(ザラ場速報), ぽんこつ@ボドゲ会忘れてた奴, Ken, kosakkun, #BarelyProgrammer, Takami Sato, Hideyuki Tanaka, ○○めも(時間の浪費), 


## 本題のRT/fav削除

- twitter api more 3200 tweets (user timeline)
- twitter api more than 7 days (search)
- twitter search api more than 100


### RT

In [289]:
import time
import datetime

In [287]:
max_id = None

In [481]:
max_id

855444549098905601

In [480]:
tweet_count = 0
destroy_count = 0
for i in range(10):
    print(i, "start:", datetime.datetime.now())
    for s in tweepy.Cursor(api.user_timeline,
                           screen_name="ksomemo", count=100,
                           max_id=max_id).items():
        try:
            tweet_count += 1
            if s.retweeted or s.text.startswith("RT @"):
                s.destroy()
                destroy_count += 1
                time.sleep(0.2)
            max_id = s.id
        except Exception as e:
            print(e)
            max_id = s.id
            print(max_id)
            print("err:", datetime.datetime.now())
            break
print(tweet_count, destroy_count)

0 start: 2017-08-19 23:43:17.794501
1 start: 2017-08-19 23:43:18.331707
2 start: 2017-08-19 23:43:18.706040
3 start: 2017-08-19 23:43:19.087822
4 start: 2017-08-19 23:43:19.485019
5 start: 2017-08-19 23:43:20.148726
6 start: 2017-08-19 23:43:20.532701
7 start: 2017-08-19 23:43:20.945410
8 start: 2017-08-19 23:43:21.305765
9 start: 2017-08-19 23:43:21.688944
0 0


user_timelineは3200件までなので、RT以外の自分のツイートがあるためすべてを取得できない

### search API
- <https://dev.twitter.com/rest/public/search>
- <https://dev.twitter.com/rest/reference/get/search/tweets>

In [316]:
search_query_table = pd.read_html("https://dev.twitter.com/rest/public/search")[0]
search_query_table

Unnamed: 0,Operator,Finds Tweets...
0,watching now,containing both “watching” and “now”. This is ...
1,“happy hour”,containing the exact phrase “happy hour”.
2,love OR hate,containing either “love” or “hate” (or both).
3,beer -root,containing “beer” but not “root”.
4,#haiku,containing the hashtag “haiku”.
5,from:interior,sent from Twitter account “interior”.
6,list:NASA/astronauts-in-space-now,sent from a Twitter account in the NASA list a...
7,to:NASA,a Tweet authored in reply to Twitter account “...
8,@NASA,mentioning Twitter account “NASA”.
9,politics filter:safe,containing “politics” with Tweets marked as po...


## Twilogから公式RTのstatus idを見つけて対応
- 全ツイートJSONのダウンロードを忘れていた
- そのため、めんどうだがスクレイピングで対応していた

In [541]:
from bs4 import BeautifulSoup
import requests

In [571]:
error_ids = []


In [542]:
start_ym = "2012-08"


In [572]:
start_ym

'2013-10'

In [586]:
twilog_month_list = pd.date_range(start_ym, "2017-08", freq="MS")
url_format = "http://twilog.org/ksomemo/month-{ym}/rt-{page}"
for first_date in twilog_month_list:
    print(first_date)
    start_ym = first_date.strftime("%Y-%m")
    ym = first_date.strftime("%y%m")

    for page in range(1, 20 + 1):
        res = requests.get(url_format.format(**{
            "ym": ym, "page": page
        }))
        bs = BeautifulSoup(res.content, "lxml")
        elements = bs.select("#content > section > article > p.tl-time > a")
        if not elements:
            break

        for el in elements:
            status_id = el.attrs.get("href").split("/")[-1]
            try:
                api.destroy_status(status_id)
                time.sleep(0.5)
            except tweepy.TweepError as e:
                # No status found with that ID.
                if "144" not in e.reason:
                    error_ids.append(status_id)
                    print(status_id, e)
            except tweepy.RateLimitError as e:
                print("-" * 10)
                raise e
            except Exception as e:
                print(e, type(e))


2013-11-01 00:00:00
398319878345461760 Failed to send request: ("bad handshake: SysCallError(54, 'ECONNRESET')",)
2013-12-01 00:00:00
416235918228324353 Failed to send request: ("bad handshake: SysCallError(54, 'ECONNRESET')",)
408400980493889536 Failed to send request: ("bad handshake: SysCallError(54, 'ECONNRESET')",)
407037072793292800 Failed to send request: ("bad handshake: SysCallError(54, 'ECONNRESET')",)
2014-01-01 00:00:00
427092163680157696 Failed to send request: ("bad handshake: SysCallError(54, 'ECONNRESET')",)
426707097057632258 Failed to send request: ("bad handshake: SysCallError(54, 'ECONNRESET')",)
421971269819449344 Failed to send request: ("bad handshake: SysCallError(54, 'ECONNRESET')",)
2014-02-01 00:00:00
2014-03-01 00:00:00
447754683856064513 Failed to send request: ("bad handshake: SysCallError(54, 'ECONNRESET')",)
446652346227896321 Failed to send request: ("bad handshake: SysCallError(54, 'ECONNRESET')",)
446302665052389376 Failed to send request: ("bad hands

### 削除漏れを削除

In [590]:
log="""
2017-06-01 00:00:00
871348835120889856 Failed to send request: ("bad handshake: SysCallError(54, 'ECONNRESET')",)
2017-07-01 00:00:00
891834192413077504 Failed to send request: ("bad handshake: SysCallError(54, 'ECONNRESET')",)
890164457325838336 Failed to send request: ("bad handshake: SysCallError(54, 'ECONNRESET')",)
2017-08-01 00:00:00
"""

In [591]:
import re
for l in log.splitlines():
    if not re.search("bad", l):
        continue
    status_id = l.split()[0]
    try:
        api.destroy_status(status_id)
    except Exception as e:
        print(status_id, e)
        

871348835120889856 [{'code': 144, 'message': 'No status found with that ID.'}]
891834192413077504 [{'code': 144, 'message': 'No status found with that ID.'}]
890164457325838336 [{'code': 144, 'message': 'No status found with that ID.'}]


## max_idを利用した直近ログの検索

In [471]:
max_id = -1
query = "from:ksomemo"
tweets = {}
for i  in range(10):
    search_result = api.search(
        q=query, result_type="recent",
        max_id=max_id, count=100
    )
    print(len(search_result))
    if search_result.max_id is None:
        break

    max_id = search_result.max_id
    print("max_id:", max_id)
    print("result id(first, last):", (search_result[0].id, search_result[-1].id))
    print("="*10)
    tweets[max_id] = search_result
    time.sleep(0.5)
    

86
max_id: 895793799024136191
result id(first, last): (898914663458459648, 895793799024136192)
0


In [464]:
for k, v in tweets.items():
    print(k, len(v))

895793799024136191 34
897464355540697087 50


### Favorite

In [308]:
print("start:", datetime.datetime.now())
for f in tweepy.Cursor(api.favorites).items():
    try:
        api.destroy_favorite(f.id)
        time.sleep(0.2)
    except Exception as e:
        print(e)
        print("err:", datetime.datetime.now())
        break


start: 2017-08-18 07:36:24.536214


## Stream
- <http://docs.tweepy.org/en/v3.5.0/streaming_how_to.html>
- <https://github.com/tweepy/tweepy/blob/v3.5.0/tweepy/streaming.py#L30-L136>
- <https://dev.twitter.com/streaming/overview>

### run
from there method. inner: _start -> _run, when async stream uses threading.Thread

- filter
- sitestream
- userstream
- sample
- retweet
- firehose


In [265]:
class MyStreamListener(tweepy.StreamListener):
    def on_connect(self):
        print("connected")
        
    def on_status(self, status):
        print(status.text)
        
    def on_error(self, status_code):
        """https://dev.twitter.com/overview/api/response-codes
        """
        print(status_code)
        if status_code == 420:
            return False

    def on_connect(self):
        print("connected")

In [614]:
listener = MyStreamListener()
stream = tweepy.Stream(auth=api.auth, listener=listener)

In [615]:
stream.disconnect()

In [None]:
try:
    stream.filter(track=["python"])
except Exception as e:
    print(e)

### HTTP Status Codes

In [260]:
tables = pd.read_html("https://dev.twitter.com/overview/api/response-codes")

In [261]:
len(tables)

2

In [262]:
tables[0]

Unnamed: 0,Code,Text,Description
0,200,OK,Success!
1,304,Not Modified,There was no new data to return.
2,400,Bad Request,The request was invalid or cannot be otherwise...
3,401,Unauthorized,Missing or incorrect authentication credential...
4,403,Forbidden,"The request is understood, but it has been ref..."
5,404,Not Found,The URI requested is invalid or the resource r...
6,406,Not Acceptable,Returned when an invalid format is specified i...
7,410,Gone,This resource is gone. Used to indicate that a...
8,420,Enhance Your Calm,Returned when an application is being rate lim...
9,422,Unprocessable Entity,Returned when an image uploaded to POST accoun...


### Error Codes

In [263]:
tables[1]

Unnamed: 0,Code,Text,Description
0,32,Could not authenticate you,Your call could not be completed as dialed.
1,34,"Sorry, that page does not exist",Corresponds with HTTP 404 - the specified reso...
2,36,You cannot report yourself for spam.,Corresponds with HTTP 403. You cannot use your...
3,44,attachment_url parameter is invalid,Corresponds with HTTP 400. The URL value provi...
4,50,User not found.,Corresponds with HTTP 404. The user is not found.
5,63,User has been suspended.,The user account has been suspended and inform...
6,64,Your account is suspended and is not permitted...,Corresponds with HTTP 403 — the access token b...
7,68,The Twitter REST API v1 is no longer active. P...,Corresponds to a HTTP request to a retired v1-...
8,87,Client is not permitted to perform this action.,Corresponds with HTTP 403. The endpoint called...
9,88,Rate limit exceeded,The request limit for this resource has been r...
