/
twitter_streamR_scraper.R
executable file
·35 lines (26 loc) · 1.23 KB
/
twitter_streamR_scraper.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#install.packages("streamR")
#install.packages("ROAuth")
library(ROAuth)
library(streamR)
#sets your working directory
setwd('/home/stats/data')
#create your OAuth credential
credential <- OAuthFactory$new(consumerKey='***CONSUMER KEY***',
consumerSecret='***CONSUMER SECRET KEY***',
requestURL='https://api.twitter.com/oauth/request_token',
accessURL='https://api.twitter.com/oauth/access_token',
authURL='https://api.twitter.com/oauth/authorize')
#authentication process
options(RCurlOptions = list(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl")))
download.file(url="http://curl.haxx.se/ca/cacert.pem", destfile="cacert.pem")
credential$handshake(cainfo="cacert.pem")
#function to actually scrape Twitter
filterStream( file.name="tweets_test.json",
track="twitter", tweets=1000, oauth=credential, timeout=10, lang='en' )
#Parses the tweets
tweet_df <- parseTweets(tweets='tweets_test.json')
#using the Twitter dataframe
tweet_df$created_at
tweet_df$text
plot(tweet_df$friends_count, tweet_df$followers_count) #plots scatterplot
cor(tweet_df$friends_count, tweet_df$followers_count) #returns the correlation coefficient