# LinkedIn Ad Library Analysis: Airbnb in Amsterdam

This notebook walks through the process of querying the LinkedIn Ad Library API to analyze Airbnb's advertising campaigns in Amsterdam, as described in the Medium post [How to query LinkedIn's ad database](https://medium.com/@poorolive51/how-to-query-linkedins-ad-database-example-with-airbnb-policy-ads-in-amsterdam-7a6e0471e4bb).

## 1. Setup

First, let's install the necessary libraries.

In [None]:
!pip install -r requirements.txt

Next, you'll need to add your LinkedIn Developer Application credentials to a `.env` file. You can copy the `.env.example` file to `.env` and fill in your `LINKEDIN_CLIENT_ID` and `LINKEDIN_CLIENT_SECRET`.

## 2. Authentication

Now, let's get an access token from LinkedIn using the 3-legged OAuth 2.0 flow. This will open a browser window for you to log in and authorize the application.

In [None]:
"""
Get a LinkedIn OAuth access token and save it to .env as LI_ACCESS_TOKEN.
Opens the LinkedIn login page, captures the authorization code, exchanges it for an access token, and stores it locally.
"""

import os
import webbrowser
import threading
import requests
from http.server import HTTPServer, BaseHTTPRequestHandler
from urllib.parse import urlparse, parse_qs, urlencode

PORT = 8000
CLIENT_ID = os.getenv("LINKEDIN_CLIENT_ID")
CLIENT_SECRET = os.getenv("LINKEDIN_CLIENT_SECRET")
REDIRECT_URI = f"http://localhost:{PORT}/callback"

auth_code = [None]

class CallbackHandler(BaseHTTPRequestHandler):
    def log_message(self, format, *args):
        pass
    def do_GET(self):
        if self.path.startswith('/callback'):
            query = urlparse(self.path).query
            params = parse_qs(query)
            if 'code' in params:
                auth_code[0] = params['code'][0]
                self.send_response(200)
                self.send_header('Content-type', 'text/html')
                self.end_headers()
                self.wfile.write(b"<h1>Authentication complete. You can close this window.</h1>")
                threading.Thread(target=server.shutdown, daemon=True).start()

server = HTTPServer(('localhost', PORT), CallbackHandler)
threading.Thread(target=server.serve_forever, daemon=True).start()

auth_url = "https://www.linkedin.com/oauth/v2/authorization?" + urlencode({
    "response_type": "code",
    "client_id": CLIENT_ID,
    "redirect_uri": REDIRECT_URI,
    "scope": "profile",
    "state": "tutorial_state"
})
webbrowser.open(auth_url)

server.serve_forever()

token_url = "https://www.linkedin.com/oauth/v2/accessToken"
token_data = {
    "grant_type": "authorization_code",
    "code": auth_code[0],
    "redirect_uri": REDIRECT_URI,
    "client_id": CLIENT_ID,
    "client_secret": CLIENT_SECRET
}

response = requests.post(token_url, data=token_data)
access_token = response.json().get("access_token")

if access_token:
    with open(".env", "a") as f:
        f.write(f"LI_ACCESS_TOKEN={access_token}\n")

## 3. Fetching Ad Data

Now that we have an access token, we can query the LinkedIn Ad Library API to fetch Airbnb's ads.

In [None]:
"""
Fetches all LinkedIn Ad Library entries for a given advertiser and saves them to a JSON file.
Handles pagination, basic retry logic, and rate limiting.
"""

import os
import time
import json
import requests
from dotenv import load_dotenv

load_dotenv()

ACCESS_TOKEN = os.getenv('LI_ACCESS_TOKEN')
if not ACCESS_TOKEN:
    raise ValueError("Missing LI_ACCESS_TOKEN in .env file.")

BASE_URL = "https://api.linkedin.com/rest/adLibrary"
ADVERTISER_NAME = "airbnb"  # Change this to target a different advertiser

headers = {
    "Authorization": f"Bearer {ACCESS_TOKEN}",
    "LinkedIn-Version": "202507",
    "X-Restli-Protocol-Version": "2.0.0",
    "Content-Type": "application/json"
}

start = 0
count = 25
all_data = []
max_retries = 5
stop_fetching = False

while not stop_fetching:
    params = {
        "advertiser": ADVERTISER_NAME,
        "q": "criteria",
        "start": start,
        "count": count,
    }

    retries = 0
    while retries <= max_retries:
        response = requests.get(BASE_URL, headers=headers, params=params)

        if response.status_code == 200:
            data = response.json()
            elements = data.get("elements", [])

            if not elements:
                stop_fetching = True
                break

            all_data.extend(elements)
            start += count
            time.sleep(5)  # Respect rate limits
            break

        elif response.status_code == 429:  # Rate limited
            wait_time = 2 ** retries
            time.sleep(wait_time)
            retries += 1
        else:  # Any other error
            stop_fetching = True
            break
    else:
        break

with open("airbnb_all_ads.json", "w", encoding="utf-8") as f:
    json.dump(all_data, f, ensure_ascii=False, indent=4)


## 4. Analysis and Plotting

Finally, let's analyze the ad data and plot the daily impressions for ads targeting Amsterdam versus the rest of the Netherlands.

In [None]:
"""
Visualizes Airbnb's LinkedIn ad impressions in Amsterdam vs. the rest of the Netherlands.
Parses saved ad data, calculates daily impressions, and plots them over time with annotations for key policy events.
"""

import json
import pandas as pd
from datetime import datetime, timedelta
import plotly.graph_objects as go
from collections import defaultdict

CURRENT_DATE = datetime.now().date()
DATA_FILE = "airbnb_all_ads.json"

# Load saved ad data
try:
    with open(DATA_FILE, "r") as f:
        data = json.load(f)
except FileNotFoundError:
    print(f"Error: {DATA_FILE} not found.")
    exit()

ads = []

# Process ads and filter for Netherlands impressions
for ad in data:
    stats = ad.get("details", {}).get("adStatistics", {})
    if not stats or "firstImpressionAt" not in stats or "latestImpressionAt" not in stats:
        continue

    dist = stats.get("impressionsDistributionByCountry", [])
    nl_pct = next(
        (c["impressionPercentage"] for c in dist if c["country"] == "urn:li:country:NL"),
        0
    )

    ad_url = ad.get("adUrl", "Unknown URL")
    targeting = ad.get("details", {}).get("adTargeting", [])
    is_amsterdam = any(
        "amsterdam" in seg.lower()
        for t in targeting if t.get("facetName") == "Location"
        for seg in t.get("includedSegments", [])
    )
    location_segments = [
        seg
        for t in targeting if t.get("facetName") == "Location"
        for seg in t.get("includedSegments", [])
    ]

    if nl_pct > 50 and stats.get("totalImpressions"):
        imp_range = stats["totalImpressions"]
        impr_mid = (imp_range["from"] + imp_range["to"]) / 2
        duration_days = ((datetime.fromtimestamp(stats["latestImpressionAt"] / 1000) -
                          datetime.fromtimestamp(stats["firstImpressionAt"] / 1000)).days) + 1
        impr_per_day = impr_mid / duration_days if duration_days > 0 else 0

        ads.append({
            "Ad URL": ad_url,
            "Impr Mid": impr_mid,
            "Start": datetime.fromtimestamp(stats["firstImpressionAt"] / 1000),
            "End": datetime.fromtimestamp(stats["latestImpressionAt"] / 1000),
            "NL %": round(nl_pct, 2),
            "Impr Per Day": impr_per_day,
            "Is Amsterdam": is_amsterdam,
            "Location Segments": location_segments
        })

df = pd.DataFrame(ads)
df["Duration (days)"] = (df["End"] - df["Start"]).dt.days + 1
df = df[df["Duration (days)"] > 0]

# Aggregate daily impressions
daily_amsterdam = defaultdict(float)
daily_other_nl = defaultdict(float)

for _, row in df.iterrows():
    day = row["Start"].date()
    while day <= row["End"].date():
        if row["Is Amsterdam"]:
            daily_amsterdam[day] += row["Impr Per Day"]
        else:
            daily_other_nl[day] += row["Impr Per Day"]
        day += timedelta(days=1)

# Create DataFrames with all dates filled
all_dates = pd.date_range(start=df["Start"].min().date(), end=CURRENT_DATE)
amsterdam_df = pd.DataFrame(daily_amsterdam.items(), columns=["Date", "Total Impressions"]).set_index("Date").reindex(all_dates, fill_value=0).reset_index().rename(columns={"index": "Date"})
other_nl_df = pd.DataFrame(daily_other_nl.items(), columns=["Date", "Total Impressions"]).set_index("Date").reindex(all_dates, fill_value=0).reset_index().rename(columns={"index": "Date"})

# Marker DataFrames for non-zero days
amsterdam_markers = amsterdam_df[amsterdam_df["Total Impressions"] > 0]
other_nl_markers = other_nl_df[other_nl_df["Total Impressions"] > 0]

# Colors
amsterdam_color = "#FF5A5F"
other_nl_color = "rgb(189, 195, 199)"

fig = go.Figure()

# Amsterdam line + markers
fig.add_trace(go.Scatter(x=amsterdam_df["Date"], y=amsterdam_df["Total Impressions"], mode="lines",
                         name="Amsterdam Impressions", line=dict(color=amsterdam_color, width=2, dash="dot"), showlegend=False))
fig.add_trace(go.Scatter(x=amsterdam_markers["Date"], y=amsterdam_markers["Total Impressions"], mode="markers",
                         name="Amsterdam Impressions", marker=dict(color=amsterdam_color, size=8),
                         hovertemplate="<b>Date:</b> %{x|%Y-%m-%d}<br><b>Impressions (Amsterdam):</b> %{y:,.0f}<extra></extra>"))

# Other NL line + markers
fig.add_trace(go.Scatter(x=other_nl_df["Date"], y=other_nl_df["Total Impressions"], mode="lines",
                         name="Other NL Impressions", line=dict(color=other_nl_color, width=2, dash="dot"), showlegend=False))
fig.add_trace(go.Scatter(x=other_nl_markers["Date"], y=other_nl_markers["Total Impressions"], mode="markers",
                         name="Other NL Impressions", marker=dict(color=other_nl_color, size=8),
                         hovertemplate="<b>Date:</b> %{x|%Y-%m-%d}<br><b>Impressions (Other NL):</b> %{y:,.0f}<extra></extra>"))

# Add annotations for key events
fig.add_annotation(
    x=amsterdam_df["Date"].min() + timedelta(days=190), y=5,
    text="March 19: Airbnb policy paper to government",
    showarrow=True, arrowhead=1, ax=0, ay=-200,
    font=dict(size=12), bgcolor="white", bordercolor=amsterdam_color,
    borderwidth=2, borderpad=4, opacity=0.8
)
fig.add_annotation(
    x=amsterdam_df["Date"].min() + timedelta(days=180), y=0.2,
    text="March 11: Amsterdam city govt announces plans to restrict Airbnb rentals",
    showarrow=True, arrowhead=1, ax=0, ay=-100,
    font=dict(size=12), bgcolor="white", bordercolor="#1e8cd0",
    borderwidth=2, borderpad=4, opacity=0.8
)
fig.add_annotation(
    x=amsterdam_df["Date"].min() + timedelta(days=270), y=35000,
    text="May 23: Public consultation period starts...",
    showarrow=False, font=dict(size=12), bgcolor="white", bordercolor="#1e8cd0",
    borderwidth=2, borderpad=4, opacity=0.8
)

# Layout
fig.update_layout(
    title=dict(text="Airbnb Ads in Amsterdam vs. Other NL", x=0.5, xanchor="center"),
    yaxis_title="Daily Impressions",
    template="plotly_white",
    hovermode="x unified",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)

fig.show()
