Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added date_window property for reports_email_activity stream #61

Merged
merged 10 commits into from Jun 8, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,8 @@
# Changelog

## 1.2.0
* Date Window Implementation for reports_email_activity [#61](https://github.com/singer-io/tap-mailchimp/pull/61)

## 1 1 3
* Request Timeout Implementation [#43](https://github.com/singer-io/tap-mailchimp/pull/43)
## 1.1.2
Expand Down
1 change: 1 addition & 0 deletions README.md
Expand Up @@ -31,6 +31,7 @@ Config properties:
| `start_date` | Y | "2010-01-01T00:00:00Z" | The default start date to use for date modified replication, when available. |
| `user_agent` | N | "Vandelay Industries ETL Runner" | The user agent to send on every request. |
| `request_timeout` | N | 300 | Time for which request should wait to get response. |
| `email_activity_date_window` | N | 30 | Used to fetch campaigns that are sent in the last `x` days to retrive `reports_email_activity` stream |

## Usage

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -3,7 +3,7 @@
from setuptools import setup

setup(name='tap-mailchimp',
version='1.1.3',
version='1.2.0',
description='Singer.io tap for extracting data from the Mailchimp API',
author='Stitch',
url='https://singer.io',
Expand Down
10 changes: 10 additions & 0 deletions tap_mailchimp/client.py
Expand Up @@ -13,6 +13,7 @@ class ClientRateLimitError(Exception):
class Server5xxError(Exception):
pass

# pylint: disable=R0902
class MailchimpClient:
def __init__(self, config):
self.__user_agent = config.get('user_agent')
Expand All @@ -22,6 +23,15 @@ def __init__(self, config):
self.__base_url = None
self.page_size = int(config.get('page_size', '1000'))

# performs date-window calculation for fetching campaigns
try:
date_window_duration = int(config.get('email_activity_date_window', 0))
self.adjusted_start_date = False if date_window_duration == 0 else \
(singer.utils.now().date() - singer.utils.datetime.timedelta(days = date_window_duration))
except ValueError:
LOGGER.info("Invalid Value: %s, for date windowing", config.get('email_activity_date_window', 0))
LOGGER.critical("Date windowing disabled")
self.adjusted_start_date = False
Vi6hal marked this conversation as resolved.
Show resolved Hide resolved
# Set request timeout to config param `request_timeout` value.
# If value is 0,"0","" or not passed then it set default to 300 seconds.
config_request_timeout = config.get('request_timeout')
Expand Down
50 changes: 34 additions & 16 deletions tap_mailchimp/sync.py
Expand Up @@ -452,6 +452,39 @@ def check_and_resume_email_activity_batch(client, catalog, state, start_date):
campaigns = [] # Don't need a list of campaigns if resuming
sync_email_activity(client, catalog, state, start_date, campaigns, batch_id)

def fetch_recent_campaigns(client, catalog, state, campaigns_config):
return sync_endpoint(client, catalog, state,
client.adjusted_start_date, # adjusted start date
"campaigns", False, # persist set to false (fetch campaign id's only)
campaigns_config.get('path'),
campaigns_config.get( 'data_path', "campaigns"),
campaigns_config.get('params', {}), ["campaigns"],
"since_send_time", #new bookmark_query_field
None)

def sync_reports_email_activity(streams_to_sync, id_bag, client, catalog, state, start_date, campaign_config):
should_stream, _ = should_sync_stream(
streams_to_sync, [], 'reports_email_activity')
if client.adjusted_start_date:
LOGGER.info("Fetching Campaigns since %s for email activty", client.adjusted_start_date)
campaign_ids = fetch_recent_campaigns(client, catalog, state, campaign_config)
else:
campaign_ids = id_bag.get('campaigns')
if should_stream and campaign_ids:
# Resume previous batch, if necessary
check_and_resume_email_activity_batch(
client, catalog, state, start_date)
# Chunk batch_ids, bookmarking the chunk number
sorted_campaigns = sorted(campaign_ids)
chunk_bookmark = int(get_bookmark(
state, ['reports_email_activity_next_chunk'], 0))
for i, campaign_chunk in enumerate(chunk_campaigns(sorted_campaigns, chunk_bookmark)):
write_email_activity_chunk_bookmark(
state, chunk_bookmark, i, sorted_campaigns)
sync_email_activity(client, catalog, state,
start_date, campaign_chunk)
# Start from the beginning next time
write_bookmark(state, ['reports_email_activity_next_chunk'], 0)
## TODO: is current_stream being updated?

def sync(client, catalog, state, start_date):
Expand Down Expand Up @@ -523,19 +556,4 @@ def sync(client, catalog, state, start_date):
stream_name,
endpoint_config)

should_stream, _ = should_sync_stream(streams_to_sync,
[],
'reports_email_activity')
campaign_ids = id_bag.get('campaigns')
if should_stream and campaign_ids:
# Resume previous batch, if necessary
check_and_resume_email_activity_batch(client, catalog, state, start_date)
# Chunk batch_ids, bookmarking the chunk number
sorted_campaigns = sorted(campaign_ids)
chunk_bookmark = int(get_bookmark(state, ['reports_email_activity_next_chunk'], 0))
for i, campaign_chunk in enumerate(chunk_campaigns(sorted_campaigns, chunk_bookmark)):
write_email_activity_chunk_bookmark(state, chunk_bookmark, i, sorted_campaigns)
sync_email_activity(client, catalog, state, start_date, campaign_chunk)

# Start from the beginning next time
write_bookmark(state, ['reports_email_activity_next_chunk'], 0)
sync_reports_email_activity(streams_to_sync, id_bag, client, catalog, state, start_date, endpoints["campaigns"])
51 changes: 51 additions & 0 deletions tests/unittests/test_date_window.py
@@ -0,0 +1,51 @@
import unittest
from datetime import datetime,timedelta
from tap_mailchimp.client import MailchimpClient

class TestDateWindowConfig(unittest.TestCase):

def test_datewindow_disabled_no_val(self):
"""
Verify if date_windowing is disabled if no value is passed
"""
# Initialize MailchimpClient object
client = MailchimpClient({'access_token': 'TOKEN'})

self.assertEqual(client.adjusted_start_date,False)

def test_datewindow_disabled_empty_str(self):
"""
Verify if date_windowing is disabled if empty string value is passed
Verify no Exception is raised for typecasting error between str to num
"""
# Initialize MailchimpClient object
client = MailchimpClient({'access_token': 'TOKEN',"date_window":""})

self.assertEqual(client.adjusted_start_date,False)

def test_datewindow_disabled_bool_val(self):
"""
Verify if date_windowing is disabled if bool value is passed
"""
# Initialize MailchimpClient object
client = MailchimpClient({'access_token': 'TOKEN',"email_activity_date_window":False})
self.assertEqual(client.adjusted_start_date,False)

def test_datewindow_disabled_num_val(self):
"""
Verify if date_window is disabled if 0 value is passed
"""
# Initialize MailchimpClient object
client = MailchimpClient({'access_token': 'TOKEN',"email_activity_date_window":0})
self.assertEqual(client.adjusted_start_date,False)

def test_datewindow_enabled_num_val(self):
"""
Verify if date_window is enabled if num value is passed
"""
# Initialize MailchimpClient object
client = MailchimpClient({'access_token': 'TOKEN',"email_activity_date_window":3})

time_diff = datetime.now().date() - client.adjusted_start_date

self.assertEqual(timedelta(days=3),time_diff)
Vi6hal marked this conversation as resolved.
Show resolved Hide resolved