## FaceBook Analytics
This notebook aims at pulling posts, comments and analytics from a specific facebook page and to enrich comments with Sentiment Analytics.

In [None]:
import requests
import json
from datetime import datetime, timedelta
from pytz import timezone
from azure.cosmosdb.table.tableservice import TableService
from azure.cosmosdb.table.models import Entity
from notebookutils import mssparkutils

#Ensure to upload requirements.txt package to the spark pool before running for the first time

### Setup Variables

Variables are defined by Environment Vars or `.env` file.

| Variable | Description |
| --- | :---|
| SENTIMENT_KEY | Text Analytics Key |
| SENTIMENT_URL | Endpoint URL for the Text Analytics API |
| STORAGE_ACCOUNT | Name of the Storage account for the Table Storage |
| STORAGE_KEY | Key for the Storage Account |
| PAGE_ID | Facebook Page ID for the page we are analyzing |
| APP_ID | Facebook Dev Application APP_ID |
| APP_SECRET | App Secret for the App |
| DAYS_LIMIT | We will collect the last DAYS_LIMIT days worth of Posts |
| PAGE_TOKEN | This is a permanent Page Token for the Facebook Page we are analysing. |

The `PAGE_TOKEN` is a permanent one, it is obtained by using a temporary user token and calling fb.get_permanent_page_token

```


In [None]:
#Ensure Synapse workspace has been given GET and LIST permissions in Key Vault >> Access Policies
#Ensure the Key Vault Linked Service created under "Manage" has the same name as below i.e. KeyVaultLinkedService

keyvault_name = 'smademokeyvault' # Input the Key Vault Name Here
STORAGE_ACCOUNT = TokenLibrary.getSecret(keyvault_name,"STORAGEACCOUNT","KeyVaultLinkedService")
STORAGE_ACCOUNT_KEY = TokenLibrary.getSecret(keyvault_name,"STORAGEACCOUNTKEY","KeyVaultLinkedService")
SENTIMENT_KEY = TokenLibrary.getSecret(keyvault_name,"TEXTANALYTICSKEY","KeyVaultLinkedService")
SENTIMENT_URL = TokenLibrary.getSecret(keyvault_name,"TEXTANALYTICSENDPOINT","KeyVaultLinkedService")
APP_ID = TokenLibrary.getSecret(keyvault_name,"FBAPPID","KeyVaultLinkedService")
APP_SECRET = TokenLibrary.getSecret(keyvault_name,"FBAPPSECRET","KeyVaultLinkedService")
PAGE_TOKEN = TokenLibrary.getSecret(keyvault_name,"FBPAGETOKEN","KeyVaultLinkedService")
PAGE_ID = TokenLibrary.getSecret(keyvault_name,"FBPAGEID","KeyVaultLinkedService")
DAYS_LIMIT = 1095

#### Include sentiment library with helper functions

In [None]:
import uuid, json, time
from ratelimit import limits, sleep_and_retry

class textAnalytics(object):
  def __init__(self, endpoint, key):
    self.endpoint = endpoint
    self.key = key
    self.start_time = time.time()
    self.calls_count = 0
    
  @sleep_and_retry
  @limits(calls=3, period=1)
  def get_post_response(self, url, headers, json):
    self.calls_count += 1
    response = requests.post(self.endpoint + url, headers=headers, json=json)
    #et = time.time() - self.start_time
    #print(f"Call Number {self.calls_count} | Time Elapsed {et} | Velocity {self.calls_count/et}")
    return response.json()
    
  #######################################
  # Helper function to get the sentiment
  # Calls the Sentiment Webservice API
  def get_sentiment(self, input_text, input_language):
    length = len(input_text)
    if length == 0:
        return "error"
    elif length > 5120:
        input_text = input_text[:5120]

    headers = {
        'Ocp-Apim-Subscription-Key': self.key,
        'Content-type': 'application/json'
    }
    # You can pass more than one object in body, here we only pass one
    body = {
        'documents': [
            {
                'language': input_language,
                'id': '1',
                'text': input_text
            }
        ]
    }
    responsejson = self.get_post_response("text/analytics/v3.0/sentiment", headers=headers, json=body)
    if ('documents' in responsejson) & (len(responsejson['documents'])):
        return responsejson['documents'][0]['sentiment']
    return "error"

  def get_keyphrase(self, input_text, input_language, return_list = False):
    length = len(input_text)
    if length == 0:
        return ""
    elif length > 5120:
        input_text = input_text[:5120]

    headers = {
        'Ocp-Apim-Subscription-Key': self.key,
        'Content-type': 'application/json',
        'X-ClientTraceId': str(uuid.uuid4())
    }
    # You can pass more than one object in body, here we only pass one
    body = {
        'documents': [
            {
                'language': input_language,
                'id': '1',
                'text': input_text
            }
        ]
    }
    responsejson = self.get_post_response("text/analytics/v3.0/keyphrases", headers=headers, json=body)
    #print(responsejson)
    keyphrase = ""
    if ('documents' in responsejson) & len(responsejson['documents']):
        #print(responsejson['documents'][0]['keyPhrases'])
        isFirst = True
        if return_list:
          return responsejson['documents'][0]['keyPhrases']
        else:
          for phrase in responsejson['documents'][0]['keyPhrases']:
              if not isFirst:
                  keyphrase = keyphrase + ", " + phrase
              else:
                  keyphrase = phrase
                  isFirst = False        
    return keyphrase

  def get_language(self, input_text):
    length = len(input_text)
    if length == 0:
        return "unk", 'Unknown', 0
    elif length > 5120:
        input_text = input_text[:5120]

    headers = {
        'Ocp-Apim-Subscription-Key': self.key,
        'Content-type': 'application/json'
    }
    # You can pass more than one object in body, here we only pass one
    body = {
        'documents': [
            {
                'id': '1',
                'text': input_text
            }
        ]
    }
    responsejson = self.get_post_response("text/analytics/v3.0/languages", headers=headers, json=body)
    #print(responsejson)
    if ('documents' in responsejson) & (len(responsejson['documents'])):
      confidence = float(responsejson['documents'][0]['detectedLanguage']['confidenceScore'])
      if confidence > 0.6:
        return responsejson['documents'][0]['detectedLanguage']['iso6391Name'], responsejson['documents'][0]['detectedLanguage']['name'], confidence
      else:
        return 'unk', 'Unknown', 1 - confidence
    return "unk", 'Unknown', 0

### Facebook class - primary supporting abstractions
https://developers.facebook.com/docs/graph-api/reference/v9.0/insights
https://developers.facebook.com/docs/pages/mentions/

In [None]:
class facebook(object):
  def __init__(self, page_id, page_token, app_id, app_secret):
    self.page_id = page_id
    self.page_token = page_token
    self.app_id = app_id
    self.app_secret = app_secret

  def get_app_access_token(self):
    url = 'https://graph.facebook.com/oauth/access_token'       
    payload = {
        'grant_type': 'client_credentials',
        'client_id': self.app_id,
        'client_secret': self.app_secret
    }
    response = requests.get(url, params=payload)
    print(response.json())
    return response.json()['access_token']

  # This is just a utility function to get a permanent page token, just provide a page id and user token
  def get_permanent_page_token(self, page_id, user_token):
    # First convert the user token (short) into a long token
    url = f"https://graph.facebook.com/oauth/access_token"       
    payload = {
        'grant_type': 'fb_exchange_token',
        'client_id': self.app_id,
        'client_secret': self.app_secret,
        'fb_exchange_token': user_token
    }
    response = requests.get(url, params=payload)
    LONG_TOKEN = response.json()['access_token']

    # Use the long token to get a permanent page token
    url = f"https://graph.facebook.com/{page_id}"  
    payload = {
        'fields': 'access_token',
        'access_token': LONG_TOKEN
    }
    response = requests.get(url, params=payload)
    return response.json()['access_token']

  # Return the feed with additional metric data
  def get_feed(self):
    url = f"https://graph.facebook.com/{self.page_id}/feed"       
    payload = {
        'access_token': self.page_token,
        'fields': 'created_time,full_picture,permalink_url,message,from,shares,reactions.summary(total_count),comments.summary(total_count)',
        'limit': '50'
    }
    response = requests.get(url, params=payload)
    return response.json()

  # Get next feed page
  def next_feed(self, feed):
    if 'next' in feed['paging']:    
        url = feed['paging']['next']
        payload = {
            'access_token': self.page_token
        }
        response = requests.get(url, params=payload)
        return response.json()
    return []

  # Return comments and supporting Analytics
  def get_comments(self, feed_id):
    url = f"https://graph.facebook.com/{feed_id}/comments"       
    payload = {
        'access_token': self.page_token,
        'fields': 'id,created_time,like_count,message,comment_count,permalink_url,likes,user_likes',
        'limit': '50'
    }
    response = requests.get(url, params=payload)
    return response.json()

  # Return attachment information 
  def get_attachments(self, feed_id):
    url = f"https://graph.facebook.com/{feed_id}/attachments"       
    payload = {
        'access_token': self.page_token
    }
    response = requests.get(url, params=payload)
    return response.json()

  # Return reaction metrics
  def get_reactions(self, post_id):
    url = f"https://graph.facebook.com/{post_id}/reactions"       
    payload = {
        'access_token': self.page_token,
        'fields': 'reactions.summary(total_count)'
    }
    response = requests.get(url, params=payload)
    return response.json()

  def get_post_reaction(self, post_id, reaction_type):
    # reaction_type should be one of : NONE, LIKE, LOVE, WOW, HAHA, SAD, ANGRY, THANKFUL, PRIDE, CARE
    url = f"https://graph.facebook.com/{post_id}"
    payload = {
          'access_token': self.page_token,
          'fields': 'reactions.type(' + reaction_type + ').limit(0).summary(total_count)'
      }
    response = requests.get(url, params=payload)
    result_json = response.json()

    # Validate and return the result, else return 0
    if 'reactions' in result_json and 'summary' in result_json['reactions'] and 'total_count' in result_json['reactions']['summary']:
      return result_json['reactions']['summary']['total_count']
    return '0'

  # Helper function to get Post Text
  def get_post_text(self, post):
    if 'message' in post:
        post_text = post['message']
    elif 'story' in post:
        post_text = post['story']
    else:
        # It's probably an attachment
        post_text = post['permalink_url']
        attachments = fb.get_attachments(post['id'])
        for attachment in attachments['data']:
            if 'title' in attachment:
                post_text = attachment['title']
            elif 'description' in attachment:
                post_text = attachment['description']
            else:
                post_text = attachment['type']
    return post_text

  def add_post_reactions_detail(self, post_detail):
    # First object is for the API, second is the field that will be generated
    reaction_list = [['LIKE', 'post_reactions_like_total'],
                     ['LOVE', 'post_reactions_love_total'],
                     ['WOW', 'post_reactions_wow_total'],
                     ['HAHA', 'post_reactions_haha_total'],
                     ['SAD', 'post_reactions_sorry_total'],
                     ['ANGRY', 'post_reactions_anger_total']]
    for r in reaction_list:
      post_detail[r[1]] = self.get_post_reaction(post_detail['RowKey'], r[0])
    return post_detail

  # Save the post to a table
  def save_post(self, table_service, table_name, post, post_text):
    # Sometimes shares are not there, so make it zero
    if 'shares' in post:
        share_count = post['shares']['count']
    else:
        share_count = '0'

    if 'full_picture' in post:
      full_picture = post['full_picture']
    else:
      full_picture = ""

    if 'from' in post:
      post_from = post['from']['name']
    else:
      post_from = 'Facebook User'

    # Build the Post save
    postsave = {
        'PartitionKey': post['id'], 'RowKey' : post['id'],
        'post': post_text, 'post_time' : post['created_time'], 'from': post_from,
        'shares': share_count, 'reactions': post['reactions']['summary']['total_count'],
        'comments': post['comments']['summary']['total_count'],
        'permalink_url': post['permalink_url'],
        'full_picture.': full_picture
    }
    # Get reactions
    postsave = self.add_post_reactions_detail(postsave)

    #old code Collect Insights
    #for insight in post['insights']['data']:
    #    insight_name = insight['name']
    #    insight_value = insight['values'][0]['value']
    #    postsave[insight_name] = insight_value

    table_service.insert_or_replace_entity(table_name, postsave)

  # Save the comment to a table
  def save_comment(self, table_service, table_name, post_id, comment, sentiment, keyphrase, language, confidence):
    sentcomment = {
        'PartitionKey': post_id, 'RowKey' : comment['id'],
        'language' : language, 'lang_confidence' : confidence,
        'sentiment' : sentiment,
        'comment' : comment['message'],
        'like_count' : comment['like_count'], 'comment_count': comment['comment_count'],
        'permalink_url': comment['permalink_url'], 'keyphrase': keyphrase,
        'comment_time' : comment['created_time'] 
    }
    table_service.insert_or_replace_entity(table_name, sentcomment)

  def get_page_metrics(self):
    url = f"https://graph.facebook.com/{self.page_id}/insights"
    payload = {
        'access_token': self.page_token,
        'metric': 'page_content_activity_by_city_unique,page_fans_gender_age,page_impressions_frequency_distribution,page_website_clicks_by_site_logged_in_unique'
    }
    response = requests.get(url, params=payload)
    return response.json()

  def get_page_metrics_next(self, metrics):
    if 'next' in metrics['paging']:    
        url = metrics['paging']['next']
        payload = {
            'access_token': self.page_token
        }
        response = requests.get(url, params=payload)
        return response.json()
    return []

  def save_page_metrics(self, table_service, page_metrics_table, page_metrics_value_table, page_metrics):
    for metric in page_metrics['data']:
    #if metric['name'] == 'page_fans_gender_age':
    #print(f"{metric['period']} | {len(metric['values'])} {metric['title']}")
      metric_name_save = {
        'PartitionKey': 'Facebook', 'RowKey' : metric['name'],
        'Title': metric['title'], 'Description': metric['description'], 'Period': metric['period'],
      }
      table_service.insert_or_replace_entity(page_metrics_table, metric_name_save)
      for metric_value in metric['values']:
        for metric_value_timeslice in metric_value['value']:
          #print(f"{metric_value_timeslice} | {metric_value['value'][metric_value_timeslice]}")
          metric_save = {
            'PartitionKey': metric['name'], 'RowKey' : metric_value_timeslice,
            'Value': metric_value['value'][metric_value_timeslice], 'metric_time': metric_value['end_time']
          }
          table_service.insert_or_replace_entity(page_metrics_value_table, metric_save)

### Initialize and Create Tables
Will give error if the table already exists, just ignore it.

In [None]:
table_service = TableService(account_name=STORAGE_ACCOUNT, account_key=STORAGE_ACCOUNT_KEY)
table_service.create_table('fbcomments')
table_service.create_table('fbposts')
table_service.create_table('fbpagemetricname')
table_service.create_table('fbpagemetricvalue')

### Create an instance of the Facebook and Text Analytics class

In [None]:
fb = facebook(PAGE_ID, PAGE_TOKEN, APP_ID, APP_SECRET)
ta = textAnalytics(SENTIMENT_URL, SENTIMENT_KEY)

In [None]:
#If running for the first time, uncomment the below line to exchange your temporary token for a permanent one, then copy the result and update the temporary token in Azure Key Vault
#fb.get_permanent_page_token(PAGE_ID, PAGE_TOKEN)

### Save Page Metrics

In [None]:
fb.save_page_metrics(table_service, 'fbpagemetricname', 'fbpagemetricvalue', fb.get_page_metrics())

### Main Loop - pull posts, comments, metrics and enrich with AI

In [None]:
page = 1
counter = 0
post_new_enough = True
time_limit = datetime.now(timezone('UTC')) - timedelta(days = DAYS_LIMIT)
feed = fb.get_feed()

# Loop until there is no more data or the posts are too old
while ('data' in feed) & (post_new_enough):
    #print(f"Page {page}")
    for post in feed['data']:
        post_time = datetime.strptime(post['created_time'], "%Y-%m-%dT%H:%M:%S%z")
        # Check if the post is too old, if so, we exit the loop
        if post_time < time_limit:
            post_new_enough = False
            break;
            
        # Save the post
        post_text = fb.get_post_text(post)
        fb.save_post(table_service, 'fbposts', post, post_text)
        
        # if there are comments, collect it
        if int(post['comments']['summary']['total_count']) > 0:
            print(f"-----------------------------------\n{post_text[:100]}")
            # Process the comments
            comments = fb.get_comments(post['id'])
            if 'data' in comments:
              for comment in comments['data']:
                  comment_text = comment['message']
                  # Figure out the Language
                  language, langname, confidence = ta.get_language(comment_text)
                  # Only Process if language is known
                  if (language == 'unk'):
                    sentiment = "unknown"
                    keyphrase = ta.get_keyphrase(comment_text, 'en-ZA')
                  else:
                    # Call the Sentiment API
                    sentiment = ta.get_sentiment(comment_text, language)
                    # Call keyphrase extraction api
                    keyphrase = ta.get_keyphrase(comment_text, language)

                  # Save the comment to table
                  fb.save_comment(table_service, 'fbcomments', post['id'], comment, sentiment, keyphrase, langname, confidence)            
                  print(f"==> {sentiment} | {comment['message']}")
                  counter += 1
    feed = fb.next_feed(feed)
    page += 1

In [None]:
ta.calls_count