In [9]:
# Loading the API Response from the source
import json
import pandas as pd
from datetime import datetime

with open("google_ads_api.json", "r") as f:
    payload = json.load(f)


In [2]:
# Converting JSON to Dataframes using Pandas
rows = []

for campaign in payload["campaigns"]:
    for metrics in campaign["daily_metrics"]:
        rows.append({
            "campaign_id": campaign["campaign_id"],
            "campaign_name": campaign["campaign_name"],
            "campaign_type": campaign["campaign_type"],
            "status": campaign["status"],
            "date": metrics["date"],
            "impressions": metrics["impressions"],
            "clicks": metrics["clicks"],
            "cost_micros": metrics["cost_micros"],
            "conversions": metrics["conversions"],
            "conversion_value": metrics["conversion_value"],
        })

googledf = pd.DataFrame(rows)
googledf.head(20)


Unnamed: 0,campaign_id,campaign_name,campaign_type,status,date,impressions,clicks,cost_micros,conversions,conversion_value
0,goog_camp_001,Brand Awareness - Q1,DISPLAY,ENABLED,2024-01-01,45230,892,125340000,23,2875.5
1,goog_camp_001,Brand Awareness - Q1,DISPLAY,ENABLED,2024-01-02,48120,956,134560000,28,3520.0
2,goog_camp_001,Brand Awareness - Q1,DISPLAY,ENABLED,2024-01-03,42890,834,118230000,19,2280.0
3,goog_camp_001,Brand Awareness - Q1,DISPLAY,ENABLED,2024-01-04,51230,1023,145670000,31,3875.25
4,goog_camp_001,Brand Awareness - Q1,DISPLAY,ENABLED,2024-01-05,38920,756,108450000,17,1955.0
5,goog_camp_001,Brand Awareness - Q1,DISPLAY,ENABLED,2024-01-06,29340,567,78230000,12,1440.0
6,goog_camp_001,Brand Awareness - Q1,DISPLAY,ENABLED,2024-01-07,31250,612,85670000,14,1680.0
7,goog_camp_001,Brand Awareness - Q1,DISPLAY,ENABLED,2024-01-08,47890,945,132450000,26,3120.0
8,goog_camp_001,Brand Awareness - Q1,DISPLAY,ENABLED,2024-01-09,52340,1067,149870000,33,4125.75
9,goog_camp_001,Brand Awareness - Q1,DISPLAY,ENABLED,2024-01-10,49120,978,138920000,29,3625.0


In [12]:
# Null check for all the columns
googledf.isnull().sum()


campaign_id         0
campaign_name       0
campaign_type       0
status              0
date                0
impressions         0
clicks              0
cost_micros         0
conversions         0
conversion_value    0
dtype: int64

In [3]:
# Check for the Required Columns and creation of invalid columns
required_cols = ["campaign_id", "date", "impressions", "clicks", "cost_micros","conversion_value"]

invalid_missing = googledf[googledf[required_cols].isnull().any(axis=1)].copy()

invalid_missing["_dq_reason"] = "missing_required"
invalid_missing["_failed_rule"] = "required_fields_not_null"

len(invalid_missing)


0

In [6]:
# Negative values check:
metric_cols = ["impressions", "clicks", "cost_micros", "conversions", "conversion_value"]

invalid_negative = googledf[(googledf[metric_cols] < 0).any(axis=1)].copy()

invalid_negative["_dq_reason"] = "invalid_value"
invalid_negative["_failed_rule"] = "non_negative_metrics"

len(invalid_negative)


0

In [4]:
# Duplicate check:
duplicate_mask = googledf.duplicated(subset=["campaign_id", "date"],keep=False)

invalid_duplicates = googledf[duplicate_mask].copy()
invalid_duplicates["_dq_reason"] = "duplicate"
invalid_duplicates["_failed_rule"] = "campaign_date_unique"

len(invalid_duplicates)


0

In [7]:
dq_issues = pd.concat([invalid_missing, invalid_negative, invalid_duplicates],ignore_index=True).drop_duplicates()

dq_issues


Unnamed: 0,campaign_id,campaign_name,campaign_type,status,date,impressions,clicks,cost_micros,conversions,conversion_value,_dq_reason,_failed_rule


In [8]:
# Valid Google Ads:
valid_google = googledf[~googledf.index.isin(dq_issues.index)].copy()

len(valid_google)


45

In [23]:
# Final Sanity check
print("Total rows:", len(googledf))
print("Invalid rows:", len(dq_issues))
print("Valid rows:", len(valid_google))


Total rows: 45
Invalid rows: 0
Valid rows: 45


In [10]:
valid_google["source_system"] = "google"
valid_google["ingested_at"] = datetime.utcnow()


In [11]:
print(valid_google)

      campaign_id                  campaign_name campaign_type   status  \
0   goog_camp_001           Brand Awareness - Q1       DISPLAY  ENABLED   
1   goog_camp_001           Brand Awareness - Q1       DISPLAY  ENABLED   
2   goog_camp_001           Brand Awareness - Q1       DISPLAY  ENABLED   
3   goog_camp_001           Brand Awareness - Q1       DISPLAY  ENABLED   
4   goog_camp_001           Brand Awareness - Q1       DISPLAY  ENABLED   
5   goog_camp_001           Brand Awareness - Q1       DISPLAY  ENABLED   
6   goog_camp_001           Brand Awareness - Q1       DISPLAY  ENABLED   
7   goog_camp_001           Brand Awareness - Q1       DISPLAY  ENABLED   
8   goog_camp_001           Brand Awareness - Q1       DISPLAY  ENABLED   
9   goog_camp_001           Brand Awareness - Q1       DISPLAY  ENABLED   
10  goog_camp_001           Brand Awareness - Q1       DISPLAY  ENABLED   
11  goog_camp_001           Brand Awareness - Q1       DISPLAY  ENABLED   
12  goog_camp_001        