# Using the SDK to help you do stuff beyond what it was made for

### First import everything we need - notice how we also set logging to critical at this point

In [None]:
#bw stuff
from bwresources import BWQueries
from bwproject import BWUser, BWProject

import logging
logger = logging.getLogger("bwapi")
# Disable logging
logger.setLevel(logging.CRITICAL)

#other stuff
from getpass import getpass
import csv
import re
import pandas as pd

from datetime import datetime

import mysecret

import requests
from requests.auth import HTTPBasicAuth

In [None]:
##### Log in using the details in mysecret.py

project = BWProject(username=mysecret.username, password=mysecret.password, project=mysecret.project)

## Background

* What is the SDK actually meant to do?
    * The SDK is written to **simplify the manipulation of queries, groups, mentions, rules, categories, tags, author lists, location lists, site lists and signals**.  
    * It also now handles **data requests**, i.e. anything that you could export to csv from a dashboard component.
    * Notice this doesn't say anything about dashboards!
    
## Problem
* For Iris, Billiejoe wants a load of example mentions that are used in reports

## Solution
* Make a script that downloads a dashboard, and then extracts all the links for this dashboard

## Difficult bit / bit that's relevant to this workshop: Getting data from the dashboard
* This presents us with various options
    * Replicate network activity using our internet browser
        * In Chrome, Inspect > Network
        * Advantages
            * Doesn't require any fancy tools
            * You know it should be possible
        * Disadvantages
            * Can be complicated to actually figure out network activity
            * The frontend doesn't 100% follow the structure and naming convention as the API
                * e.g. 'project' might sometimes be called 'projects'
    * Somehow get the data directly from a tool someone at Brandwatch has made
        * Advantages
            * Should really be easier, once you know what you're doing
            * Should be better for reusing across clients, projects and users
        * Disadvantages
            * You've got to work out who actually knows what this thing is, and how to use it        

#### An example request from Chris Skilton

` curl 'https://api.brandwatch.com/projects/[project id]/dashboards/[dashboard id]' -H 'Authorization: bearer [redacted]' -H 'Content-Type: application/json' `

From that, we know the basic structure of the request:
* We know the request url (although we'll need to fill in the project id and dashboard id
* We know that there are going to be two headers to the request:
    * Authorization, which is going to need a token
    * Content-Type, which is going to be `application/json`
    
So really, the only info in that request we don't have is the token. And the SDK can get that for us!

In [None]:
project.token

# So, filling in the details...

`curl 'https://api.brandwatch.com/projects/1998163843/dashboards/549448' -H 'Authorization: bearer 4aad0a28-4dc0-4822-9c38-47a20061b106' -H 'Content-Type: application/json'`

^ **This works!**

* But we can make things even more efficient! Let's try using project.get.
* Why? Because that makes a project level get request, therefore handles authentication
* How do we know this? Docstrings!

**Note**: You need to get the dashboard ID from this dashboard's url...
**... if the url is `app.brandwatch.com/project/1998216779/dashboards/541706`...**
**... then the dashboard ID is `541706`**

In [None]:
help(project.get)

In [None]:
user_dash_ID = "549448"

In [None]:
# So let's put this all together in a simple function!

def get_dash_response(dash_id_f):
    dash_url_string = "dashboards/"+dash_id_f #Make the url for the request
    print(f"we are going to do project.get on this url: {dash_url_string}")
    dash_response = project.get(dash_url_string) #actually make the get request
    return dash_response #now return the product of that request
dash_response = get_dash_response(user_dash_ID)

But what does that response actually give us?

In [None]:
print(type(dash_response))
print(dash_response.keys())

In [None]:
dash_response["tabs"]

So it looks like we have some kind of dictionary, and the keys of that dictionary contain lots of info about the dashboard.

Let's get a quick visual overview of that dictionary

In [None]:
dash_response

## The rest should be easy, thanks to Google :)

In [None]:
def get_notes(dash_resp_f):
    notes_dict_f = {}
    
    component_index = 0
    for tab in dash_resp_f["tabs"]:
        if "components" in tab.keys():
            print(f"tab:",tab["name"])
            for component in tab["components"]:
                if component["type"] == "note":
    #                 print(component, "\n")
                    component_index += 1    
                    print("\t","component:",component["title"])
                    notes_key = "component "+ str(component_index) + ": " + tab["name"] + ": " + component["title"]
                    notes_dict_f[notes_key] = {}
                    notes_dict_f[notes_key]["content"] = component["settings"]["content"]
                    notes_dict_f[notes_key]["dash_name"] = dash_resp_f["name"]
    #         print(tab, "\n")
        else:
            print("skipping tab, as no components found:", tab["name"])
    return(notes_dict_f)

notes_dict = get_notes(dash_response)

In [None]:
url_regex = "(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9]\.[^\s]{2,})"

In [None]:
def extract_links_and_context_sentences(notes_dict_f):
    name_regex = "[^]]+"
    markup_regex = '\[({0})]\(\s*({1})\s*\)'.format(name_regex, url_regex)
    
    sentence_dict = {}
    sentence_index = 0
    
    
    for component in notes_dict_f:
        content =  notes_dict_f[component]["content"]
        split_content = re.split('\. |\n', content)
        for sentence in split_content:
            sentence_index +=1
            sentence_dict[sentence_index] = {
            "text":"",
            "link count":0,
            "links":[],
            "component": component,
        }
            temp_link_list = []
            link_count = 0
            for match in re.findall(markup_regex, sentence):
                if len(match) >0:
                    link_count +=1
                    sentence_dict[sentence_index]["links"].append(list(match)[:2])
                    sentence_dict[sentence_index]["text"] = sentence
                    sentence_dict[sentence_index]["link count"] = link_count
                    sentence_dict[sentence_index]["dash_name"] = notes_dict_f[component]["dash_name"] 
                    
    return(sentence_dict)
            
sentences_info = extract_links_and_context_sentences(notes_dict)

In [None]:
sentences_with_links = {}

for sentence in sentences_info:
    if sentences_info[sentence]["link count"]>0:
        sentences_with_links[sentence] = sentences_info[sentence]

In [None]:
def add_non_url_key(sentences_with_links_f):
    
    output_dict = sentences_with_links_f
    
    for sentence in sentences_with_links_f:
        sentence_l = sentences_with_links_f[sentence]["text"]
        compiled = re.compile(url_regex)
        for x in compiled.finditer(sentence_l):
            url_start_loc = x.start()
            url_end_loc = x.end()
#         print(sentence_l[url_start_loc:url_end_loc]) #prints the extracted url, for diagnosis only
        sentence_minus_url = sentence_l[:url_start_loc-1]+sentence_l[url_end_loc:]
#         print(sentence_minus_url)
        output_dict[sentence]["sentence_minus_url"] = sentence_minus_url
        
    return output_dict
    
sentences_with_links = add_non_url_key(sentences_with_links)

In [None]:
def extract_analysis_info(sentences_with_links_f):
    
    quote_regex = """(["'])(\\?.)*?\1"""

    peak_phrases = ["peak", "spike", "uptick", "theme"]
    #not including all 'topic' becuase sometimes people talk about a topic within a peak, channel, category, etc.
    topic_phrases = ["trending topic", "fading topic", "topic"]

    for sentence in sentences_with_links_f:
        sentence_l = sentences_with_links_f[sentence]["sentence_minus_url"]
#         if any(phrase in sentence_l for phrase in peak_phrases): #enable this line and indent the try, except if results aren't precise enough
#     #         print(sentence_l, "\n")
        try:
            temp_date = dparser.parse(sentence_l, fuzzy=True)
            sentences_with_links_f[sentence]["datetime"] = temp_date
            sentences_with_links_f[sentence]["date_string"] = temp_date.strftime("%B %d")
        except:
            pass
        
        if any(phrase in sentence_l for phrase in topic_phrases):
#             print("contains topic")
#             print(sentence_l)
            topics_list = (re.findall(r"['\"](.*?)['\"]", sentence_l))
            sentences_with_links_f[sentence]["topics_list"] = topics_list
    
    return sentences_with_links_f

sentences_with_links = extract_analysis_info(sentences_with_links)

# Outfile work

In [None]:
df = pd.DataFrame.from_dict(sentences_with_links)
df_transpose = df.T
writer = pd.ExcelWriter("dash_notes_info_sentences.xlsx")
df_transpose.to_excel(writer, sheet_name='Sheet1')
writer.save()

# Example 2: Trigger a data download

# Problem: We need to trigger a data download automatically

## Why would we want to do this?
* Because I want to get Facebook data using a special client that has Facebook data download restrictions disabled
* But there might be other reasons - e.g. you want to export over 5,000 mentions for a large number of queries
    * (It's only possible to export up to 5,000 posts in a get_mentions call, which is equivalent to a Mentions & Search / Mention List component export)

In [None]:
import mysecret2 # Let's load the details of a project in a playground account...
#... rather than triggering loads of downloads for an actual client!
import json
from datetime import datetime, timedelta

In [None]:
project = BWProject(username=mysecret2.username, password=mysecret2.password, project=mysecret2.project) #Create the project object

In [None]:
user = BWUser(username=mysecret2.username, password=mysecret2.password) #Create the user object

# Side note, is there a better way of doing this? Probably.

## Now let's have a little look at those queries

In [None]:
print("loading queries...")
queries = BWQueries(project)
print("queries loaded")
queries.get()

In [None]:
# Quick function to turn a datetime into an iso format string
def iso_format(date):
    return date.strftime("%Y-%m-%dT%H:%M:%SZ")

Now let's make datetimes for today and yesterday (this will be our start date and end date).

Of course, we could choose any dates, but since we're triggering data downloads which could be quite strenuous, let's keep the date range quite short.

In [None]:
today = datetime.today()
print(f"Today is: {today}")

yesterday = today - timedelta(days=1)

print(f"Yesterday was: {yesterday}")

Now Let's get the query ID of the first query (in this case it's actually going to be a Twitter channel* called `Adaboval`)

\* Broadly, channels are treated like queries, although this relationship can be complex sometimes 

## Since we're going to specify which query we want to download though an ID, let's just get any old id

In this case, it's going to be `Adaboval`

In [None]:
query_id = queries.get()[0]['id']
print(query_id)

The request url is going to include the project ID (hint: look at the data download page in your browser - you're inside a particular project, that all your queries belong to), so let's get this

In [None]:
project_id = project.get_self()['id']
print(f"Project ID is: {project_id}")

In [None]:
project.get_self()

The request is also going to involve some information, like what query we want to download, what the start dates, and end dates are.

We can have another look in the browser to confirm this workflow!

In [None]:
payload = {"queryId": query_id,
           "startDate": iso_format(yesterday),
           "endDate": iso_format(today),
           "additionalColumns": [],}

serialized_payload = json.dumps(payload)

In [None]:
serialized_payload

## Hang on!

We created a payload and then serialized it then. What does that actually mean?

In [None]:
print(f"Payload type: {type(payload)}")
print(f"Serialized payload type: {type(serialized_payload)}")

In [None]:
serialized_payload

In [None]:
trigger_response = user.request(requests.post, f"projects/{project.project_id}/datadownload", data=serialized_payload)

In [None]:
trigger_response