In [1]:
# Installing requirements 
# !pip install pyquery
# !pip install -r './assets/got_requirements.txt'
# !pip install langdetect

In [1]:
import pandas as pd
import time
import GetOldTweets3 as got
from langdetect import detect
from facebook_scraper import get_posts
import pickle

## Create function to get tweets from multiple users

In [2]:
def get_tweets(users, since_date, until_date, num_tweets):
    '''
    Pulls tweets from multiple users and returns a pandas dataframe.
    Index: date
    Columns: text, username
    ---
    users: List of usernames without the '@' sign
    since_date: Date that to start pulling tweets. Input as string "YYYY-MM-DD"
    until_date: Date that to tweets go up to. Input as string "YYYY-MM-DD"
    num_tweets: Int. Total number of tweets to pull per user
    '''
    all_tweets = pd.DataFrame()
    for user in users:
        tweetCriteria = got.manager.TweetCriteria().setUsername(user).setSince(since_date).setUntil(until_date).setMaxTweets(num_tweets)
        tweets = got.manager.TweetManager.getTweets(tweetCriteria)
        df = tweets_to_df(tweets)
        all_tweets = pd.concat([all_tweets,df])
    
    all_tweets["language"] = all_tweets["text"].apply(detect)
        
    return all_tweets
        

## Create function to convert tweets to dataframe

In [3]:
# This code was adapted from Ritchie Kwan's project
# https://github.com/rkkwan/disaster-rapid-alert/blob/master/code/01-Gathering-Data.ipynb
def tweets_to_df(tweets):
    '''
    Converts tweets in acquired using GOT into a Pandas dataframe.
    Index: date
    Columns: text
    '''
    tweets_list = []
    for t in tweets:
        tweet_dict = {}
        tweet_dict['date'] = t.date
        tweet_dict['text'] = t.text
        tweet_dict['user'] = t.username
        tweets_list.append(tweet_dict)
        
    tweets_df = pd.DataFrame(tweets_list)
    
    # convert to time series
    tweets_df.set_index(tweets_df['date'], inplace = True)
    tweets_df.sort_index(ascending = True)
    
    return tweets_df[["text", "user"]]

## Get tweets

In [4]:
# list of users to pull tweets from
users=["CountyofSonoma", "create_self"]
sample_tweets = get_tweets(users, "2019-10-23", "2019-11-06", 50)

In [5]:
# filter for tweets in English
english_tweets = sample_tweets[sample_tweets["language"] == "en"]

## Pull Facebook Posts

In [109]:
# list of FB pages
fb_users = ["PetalumaHealthCenter", "reachforhome", "sonomaovernightsupport", "westcountycommunityservices", "CatholicCharitiesSantaRosa", "CommunityActionNapaValley", "Christian-Help-Center", "meemashouse", "saysc"]

## Create function to get tweets from multiple users

In [120]:
def get_fb(users, num_pages):
    '''
    Pulls fb posts from multiple users and returns a pandas dataframe.
    Columns: post_id, text, post_text, shared_text, time, image, likes, comments, shares, post_url, link
    ---
    users: List of usernames 
    num_pages: Int. Total number of pages to pull per user
    '''
    fb_list = []    
    for user in users:
        for post in get_posts(user, pages=num_pages):
            fb_posts = {} 
            fb_posts["user"] = user
            fb_posts.update(post)
            fb_list.append(fb_posts)
    return pd.DataFrame(fb_list)
        

In [None]:
# pull 50 pages of posts per each FB page
fb = get_fb(fb_users, 50)

In [113]:
# filter for posts around the dates of Kincade fire
kincade = fb[(fb["time"] > "2019-10-23" )& (fb["time"] < "2019-12-31")]

In [116]:
# filter for posts that contain fire in the text post
kincade_fire = kincade[kincade["text"].str.contains("fire")]

## Pickle for later use

In [119]:
pickle.dump(kincade_fire, open("../Data/kincade_fire_fb.pkl", "wb"))