In [1]:
import os
import csv
from io import StringIO
from zipfile import ZipFile
from os.path import join
from collections import defaultdict
from glob import glob

In [2]:
REPORTS = '../data/facebook/reports'

COUNTRIES = [
    'AT',
    'BE',
    'BG',
    'CY',
    'CZ',
    'DE',
    'DK',
    'EE',
    'ES',
    'FI',
    'FR',
    'GB',
    'GR',
    'HR',
    'HU',
    'IE',
    'IT',
    'LT',
    'LU',
    'LV',
    'MT',
    'NL',
    'PL',
    'PT',
    'RO',
    'SE',
    'SI',
    'SK',
    'US'
]

In [6]:
# Finding unique advertisers and the country they operate in
ADVERTISERS = defaultdict(set)

for country in COUNTRIES:
    folder = join(REPORTS, country)
    
    last_date = sorted(d for d, _, _, in os.walk(folder))[-1].split('/')[-1]
    zip_path = glob(join(REPORTS, country, last_date, '*_lifelong.zip'))[0]

    with ZipFile(zip_path) as zip:
        csv_entry = next(e for e in zip.infolist() if e.filename.endswith('.csv'))
        
        with zip.open(csv_entry.filename) as csv_f:
            csv_io = StringIO(csv_f.read().decode('utf-8-sig'))
            reader = csv.DictReader(csv_io)
            
            for line in reader:
                ADVERTISERS[line['Page Name']].add(country)


In [7]:
for advertiser, country in ADVERTISERS.items():
    if len(country) > 1:
        print(advertiser, country)

SPÖ {'AT', 'DE'}
European Parliament {'SI', 'DK', 'US', 'FR', 'CY', 'DE', 'SK', 'HR', 'LT', 'BE', 'RO', 'EE', 'LV', 'IT', 'GB', 'MT', 'HU', 'BG', 'PT', 'ES', 'FI', 'PL', 'SE', 'AT', 'GR', 'IE', 'LU', 'NL', 'CZ'}
Andreas Schieder {'AT', 'DE', 'GB'}
HC Strache {'AT', 'DE'}
Pamela Rendi-Wagner {'AT', 'DE'}
European Greens {'SI', 'DK', 'CY', 'DE', 'SK', 'HR', 'LT', 'BE', 'RO', 'EE', 'LV', 'IT', 'GB', 'MT', 'HU', 'BG', 'PT', 'ES', 'FI', 'PL', 'SE', 'AT', 'GR', 'IE', 'LU', 'NL', 'CZ'}
Karoline Edtstadler {'SI', 'US', 'FR', 'DE', 'SK', 'HR', 'LT', 'BE', 'RO', 'EE', 'LV', 'IT', 'MT', 'HU', 'BG', 'PT', 'ES', 'PL', 'AT', 'GR', 'IE', 'LU', 'NL', 'CZ'}
Johann Gudenus {'AT', 'DE'}
Sozialdemokratische Partei Europas {'AT', 'DE'}
Grüner Fisher Investments {'AT', 'DE'}
EU Environment {'AT', 'GR', 'IE', 'LU', 'RO', 'BG', 'SI', 'NL', 'IT', 'FR', 'CY', 'FI', 'PL', 'DE', 'SK', 'MT', 'HR', 'LT', 'BE'}
Tierschutzvolksbegehren {'AT', 'DE'}
Sportnahrung {'AT', 'DE'}
Socialists and Democrats Group in the Europ

Guyana Petroleum Digest {'CY', 'US'}
Quizways {'CY', 'GB'}
DUDUflix {'CY', 'RO'}
The Pier {'GR', 'ES', 'IT', 'FR', 'CY', 'DE'}
Unexplained Mysteries {'LU', 'SI', 'EE', 'LV', 'CY', 'MT', 'HR', 'LT'}
IronStone Legal {'CY', 'GR', 'IE', 'ES'}
Чудеса случаются {'EE', 'LV', 'ES', 'IT', 'CY', 'DE', 'SK', 'LT'}
Harvard Business Review - Россия {'LV', 'CY', 'SI'}
Boobam {'CY', 'GR'}
Roar LK {'SI', 'PT', 'IT', 'GB', 'CY', 'LT'}
George Satsidis (Γιώργος Σατσίδης) {'CY', 'GR', 'IT', 'PT'}
Friends of the Earth International {'NL', 'ES', 'CY', 'FI', 'HR'}
World Design Organization {'CY', 'LU', 'DE'}
The Golf Clearance Outlet {'DK', 'PT', 'NL', 'ES', 'IT', 'FR', 'CY', 'FI', 'SE'}
Όθων Καίσαρης- Υποψήφιος Ευρωβουλευτής, "Το Ποτάμι" {'CY', 'GR'}
Diplomacy24 {'CY', 'GR'}
The Libra Group {'GR', 'IE', 'RO', 'US', 'LV', 'ES', 'IT', 'GB', 'CY', 'DE'}
Global Network for Advanced Management {'GR', 'RO', 'PT', 'NL', 'ES', 'CY', 'SK', 'HR'}
Furious NBA Kobe Bryant's Fanbase {'CY', 'GR'}
البنك الدولي {'CY', 'ES'