-
Notifications
You must be signed in to change notification settings - Fork 2
/
mbot.py
103 lines (88 loc) · 2.98 KB
/
mbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import tweepy
import os
import time
import pandas as pd
import numpy as np
from collections import Counter
from IPython.display import display
import matplotlib.pyplot as plt
import seaborn as sns
import random
from sklearn.feature_extraction import stop_words
sorted(list(stop_words.ENGLISH_STOP_WORDS))[:20]
consumer_key = ""
access_token = ""
access_secret = ""
consumer_secret = ""
def get_api():
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
# api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
api = tweepy.API(auth)
return api
def get_tweets():
api = get_api()
# tweets = api.get_tweets(query = 'Donald Trump', count = 200)
tweets = api.user_timeline(screen_name="realDonaldTrump", count=200, tweet_mode="extended")
print("Number of tweets extracted: {}.\n".format(len(tweets)))
print("5 recent tweets:\n")
for tweet in tweets[:5]:
print(tweet.full_text)
split_it = tweets[0].full_text.split()
# print(split_it)
for tweet in tweets[:200]:
tweet.full_text = tweet.full_text.replace("RT", "")
split_it = split_it+ tweet.full_text.split()
split_it = list()
get_tweets()
begin = list()
mid = list()
mid2 = list()
mid3 = list()
end = list()
# f= open("splits.txt","w+")
# with open("splits.txt", "r") as tweetskd:
for tweet in tweets:
# tweet.full_text = tweet.full_text.replace("RT", "")
# # f.write(tweet.full_text+ "\n")
this = tweet.full_text.split()
this = tweet.split()
for i in range(len(this)):
if ("http" in this[i]) or ("@" in this[i]) or ("&" in this[i]) or ("RT" in this[i]):
this[i] = ''
if(this[i] in stop_words.ENGLISH_STOP_WORDS):
# print("STOP WORD FOUND")
this[i] = ''
while("" in this) :
this.remove("")
split_it = split_it+ this
n = int(len(this)/5)-1
begin.append(" ".join(this[0]))
mid.append(" ".join(this[n:2*n]))
mid2.append(" ".join(this[2*n:3*n]))
mid3.append(" ".join(this[3*n:4*n]))
end.append(" ".join(this[4*n:len(this)]))
# f.close()
# print(len(mid))
# print(len(end))
mid = mid2 + mid3 + mid
contweet = list()
print(len(mid))
for i in range(60):
# break
contweet.append(begin[random.randint(0,199)]+" "+mid[random.randint(0,599)]+" "+mid[random.randint(0,599)]+" "+mid[random.randint(0,599)]+" "+end[random.randint(0,199)])
# print(begin[random.randint(1,198)]+" "+mid[random.randint(1,198)]+" "+end[random.randint(1,198)])
# print(split_it)
f= open("trtweets.txt","w+")
for line in contweet:
line = line.replace(" ", " ")
# api.update_status(line)
# time.sleep(20)
f.write(line + "\n\n")
f.close()
# Pass the split_it list to instance of Counter class.
Counter = Counter(split_it)
# most_common() produces k frequently encountered
# input values and their respective counts.
most_occur = Counter.most_common(50)
print(most_occur)