In [None]:
# === ANALYSIS 1 ===
# read ads from ~/Desktop/bad-ads.txt, understand how sparse these advertisers' distribution is
# how are these harmful ads targeted?
# does the same advertiser change their strategy? a change in interests sometimes

In [None]:
# === ANALYSIS 2 ===
# related (on sensitive ads):
# what about the distribution of Lark and Noom ads? are they saturated for the same people in our dataset?
# how were they targeted anyway? does FB just know you're fat and trying to lose weight?

# pick a subset of ads targeted with healthcare;potentially harmful

In [None]:
# === ANALYSIS 3 ===
# which ads do the users think are the absolute worst?
# which code is most often described as causing discomfort?
# is there an information theoretic way of finding out? like which code has the most mutual information with "discomfort"?

In [2]:
import sys
sys.path.insert(0, '../../db-processing/')
import db_utils

CONFIG_FILE = '../../db-processing/config.reader.ini'
conn, img_path, obs_path = db_utils.connect(CONFIG_FILE)    
cursor = conn.cursor()
cursor.execute(f"SET search_path TO 'observations';")

In [17]:
from collections import defaultdict
import json
import numpy as np
import pandas as pd

In [3]:
bad_advertisers = ['Art traveling', 'World music', 'HealthyWage', 'RouFus-WU', 'Scholly', 'Floristero', 'StraightFix']

bad_ads = ['23849337677840511', '23849408099440416', '23849317710470219',
          '23849772514710031', '23849856046490748','23850361476580459', '23849488842910416']

In [29]:
# pull out all AD IDs of the bad_ads advertisers
cursor.execute("SELECT advertiser FROM ads WHERE id IN %s", (tuple(bad_ads), ))
res = cursor.fetchall()
bad_advertisers = [r[0] for r in res]

# pull out all AD IDs from these advertisers
cursor.execute("SELECT advertiser, id FROM ads WHERE advertiser IN %s", (tuple(bad_advertisers), ))
res = cursor.fetchall()
allbad = [r[0] for r in res]
# also prepare bad advertiser -> ad mapping
advertiser_ads = defaultdict(set)
for advertiser, aid in res:
    advertiser_ads[advertiser].add(aid)

In [14]:
cursor.execute("select * from pid_adid where id in %s;", (tuple(allbad), ))
res = cursor.fetchall()
badfreqs = defaultdict(list)
for pid, adid in res:
    badfreqs[pid].append(adid)

In [19]:
# load targetings
ad_targetings = {}
with open('../../db-processing/ad-targetings.tsv', 'r') as fh:
    for line in fh:
        ad_id, targeting = line.split('\t')
        targeting = json.loads(targeting.strip())
        ad_targetings[ad_id] = targeting
        
ca_advertisers = {}
# 'ca_owner_name': 'Klarna'

def get_targeting(ut, aid):
    # also updates global list of CA advertisers
    global ca_advertisers
    
    # there is always an el['__typename'] == 'WAISTUIAgeGenderType'
    all_targetings = {'id': aid}
    for el in ut['data']['waist_targeting_data']:
        if el['__typename'] == 'WAISTUIInterestsType':
            all_targetings['interests'] = set([i['name'] for i in el['interests']])
        elif el['__typename'] == 'WAISTUICustomAudienceType':
            ca_advertisers[aid] = el['dfca_data']['ca_owner_name']
            all_targetings['custom'] = True
        elif el['__typename'] == 'WAISTUILocationType':
            gran = json.loads(el['serialized_data'])['location_granularity']
            loc = el['location_name']
            all_targetings['location'] = {'loc': loc, 'gran': gran}
        elif el['__typename'] == 'WAISTUIAgeGenderType':
            all_targetings['age-gender'] = {
                'age_min': el['age_min'],
                'age_max': el['age_max'],
                'gender': el['gender']
            }        
        
    return all_targetings

In [20]:
badfreqs

defaultdict(list,
            {'470998': [23849337677840511,
              23849653159700080,
              23849408099440416],
             '606147': [23849772514710031, 23849856046490748],
             '5a349be43e523d000194399e': [23849720183600080,
              23849720183600080],
             '525028': [23849720183600080, 23849358200850219],
             '993453': [23849488842910416],
             '441420': [23849245300540219,
              23849396883360219,
              23849543534630219,
              23849376739290219,
              23849459219030219,
              23849503371640219,
              23849459219030219],
             '588623': [23849317710470219,
              23849396883360219,
              23849318067870219],
             '233362': [23849494349500524],
             '5e5ab88a43f33029b119a48c': [23849352724070219],
             '111226': [23849358155160219, 23850361476580459],
             '277428': [23849276863510009,
              23849495771130009,
          

In [32]:
print(bad_advertisers)

['HealthyWage', 'Art traveling', 'World music', 'World music', 'RouFus-WU', 'Floristero', 'StraightFix']


In [38]:
for aid in advertiser_ads['HealthyWage']:
    if str(aid) in ad_targetings:
        print(get_targeting(ad_targetings[str(aid)], aid), '\n')

{'id': 23849318067870219, 'age-gender': {'age_min': 10, 'age_max': 48, 'gender': 'FEMALE'}, 'location': {'loc': 'the United States', 'gran': 'country'}} 

{'id': 23849358639060219, 'age-gender': {'age_min': 10, 'age_max': 48, 'gender': 'MALE'}, 'location': {'loc': 'the United States', 'gran': 'country'}} 

{'id': 23849503371640219, 'age-gender': {'age_min': 10, 'age_max': 53, 'gender': 'ANY'}, 'location': {'loc': 'the United States', 'gran': 'country'}} 

{'id': 23849358155160219, 'age-gender': {'age_min': 10, 'age_max': 48, 'gender': 'FEMALE'}, 'location': {'loc': 'the United States', 'gran': 'country'}} 

{'id': 23849358200850219, 'age-gender': {'age_min': 13, 'age_max': 53, 'gender': 'ANY'}, 'location': {'loc': 'the United States', 'gran': 'country'}} 

{'id': 23849440577170219, 'age-gender': {'age_min': 10, 'age_max': 48, 'gender': 'FEMALE'}, 'location': {'loc': 'the United States', 'gran': 'country'}} 

{'id': 23849357510370219, 'custom': True, 'age-gender': {'age_min': 10, 'age_m