# Intro

This notebook pulls the latest comments on Facebook relating to a page that you manage, and updates a running list in Google Sheets.

Note that this script only pulls the latest records, so suggest running e.g. once a day/week. Next step would be to extend so Facebook/Google Sheets authentication is automated.

# Code

To run all: Runtime/Cell > Run all

Note that you will need to create a new access token from https://developers.facebook.com/tools/explorer, and paste in to step 1, as well as the Facebook page id/name and Google Sheets name/worksheet.

In [None]:
!pip install facebook-sdk
!pip install vaderSentiment

In [None]:
import pandas as pd
import facebook
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [None]:
# Step 1 - Get Facebook posts
PAGE_ID = "YOUR_PAGE_ID"
PAGE_NAME = "YOUR_PAGE_NAME"
TOKEN = "YOUR_TOKEN"

graph = facebook.GraphAPI(access_token=TOKEN, version="3.0")
posts = graph.get_object(id=PAGE_ID, fields="posts")

post_ids = []
for post in posts['posts']['data']:
    post_ids.append(post['id'])

In [None]:
# Step 2 - Load comments into data frame, add sentiment. Exclude comments from page owner
comments_df = pd.DataFrame(columns=["post_id","comment_id","created_time","sentiment","message","date"])
analyser = SentimentIntensityAnalyzer()

def get_comments(object_id):
    comments = graph.get_object(id=object_id, fields="comments{created_time,id,message,from}")

    if 'comments' in comments:  
        for comment in comments['comments']['data']:
            # Add comment if not a duplicate for same day and not posted by page owner
            date = comment['created_time'].split('T')[0]
            if not ((comments_df['date'] == date) & (comments_df['message'] == comment['message'])).any() and comment['message'] != "" and ('from' not in comment or comment['from']['name'] != PAGE_NAME):
                sentiment = analyser.polarity_scores(comment['message'])['compound']
                comments_df.loc[len(comments_df)] = [post_id,comment['id'],comment['created_time'],sentiment,comment['message'],date]

            # Call function recursively in case there are replies to *this* comment
            get_comments(comment['id'])
  
for post_id in post_ids:
    get_comments(post_id)
    
del comments_df['date']
comments_df.head(1)

In [None]:
# Step 3 - Open Google Sheet and get existing comments
from google.colab import auth
auth.authenticate_user()

import gspread
from oauth2client.client import GoogleCredentials

SHEET_NAME = "YOUR_SHEET_NAME"
WORKSHEET_NAME = "YOUR_WORKSHEET_NAME"

gc = gspread.authorize(GoogleCredentials.get_application_default())

worksheet = gc.open(SHEET_NAME).worksheet(WORKSHEET_NAME)

rows = worksheet.get_all_values()
existing_df = pd.DataFrame.from_records(rows[1:],columns=rows[0])
existing_df.head(1)

In [None]:
# Step 4 - Identify new comments (not in existing)
new_comments_df = comments_df[~comments_df['comment_id'].isin(existing_df['comment_id'])]
new_comments_df

In [None]:
# Step 5 - Write all comments to the sheet
from gspread_dataframe import set_with_dataframe
all_comments = existing_df.append(new_comments_df, sort=False).sort_values(['created_time'], ascending=[False])
set_with_dataframe(worksheet, all_comments)
print("Done: added %d new comments to sheet" % len(new_comments_df))