In [1]:
import time
import requests
import json
import pandas as pd
from datetime import datetime, timedelta

MAX_RETRIES = 10
RETRY_DELAY = 90  # Delay between retries

# Get today's date and calculate the start date (1 day before today)
end_date = datetime.now()
start_date = end_date - timedelta(days=3)

# Define the list of events
events = ["PageViewed"]

output_file = r'C:\Users\praveen\Platform(Web).xlsx'


# Define the mapping dictionary
mapping = {
    'india': 'India',
    'philippines': 'Philippines',
    'malaysia': 'Malaysia',
    'singapore': 'Singapore',
    'gulf': 'Middle East',
    'hongkong': 'Hongkong',
    'thailand': 'Thailand',
    'indonesia': 'Indonesia',
    'vietnam': 'Vietnam',
    'rexmonster': 'India',
}

# Define the required columns
required_columns = [
    'India',
    'Philippines',
    'Malaysia',
    'Singapore',
    'Middle East',
    'Hongkong',
    'Thailand',
    'Indonesia',
    'Vietnam',
]

# Function to validate and parse date format
def validate_date_format(date_str):
    try:
        return datetime.strptime(date_str, '%d/%m/%Y')
    except ValueError:
        return None


# Function to get event data for an event
def get_event_data(event_name, date_integer):
    url = "https://in1.api.clevertap.com/1/events.json?batch_size=5000"
    payload = json.dumps({
        "event_name": event_name,
        "from": date_integer,
        "to": date_integer,
        "common_profile_properties": {
            "event_properties": [
                {
                    "name": "platform",
                    "operator": "contains",
                    "value": "Desktop"
                }
            ],
            "reachability": [{
                "name": "has_email",
                "value": "True"
            }],
            "user_properties": [
                {"name": "profile_last_updated", "operator": "equals", "value": [date_integer]},
                {"name": "channel_name", "operator": "contains", "value": list(mapping.keys())}
            ]
        }
    })

    headers = {
        'X-CleverTap-Account-Id': '*******',
        'X-CleverTap-Passcode': '********',
        'Content-Type': 'application/json'
    }

    retry_count = 0
    count_country = {}
    unique_users = set()  # Set to track unique users

    while retry_count < MAX_RETRIES:
        response = requests.post(url, headers=headers, data=payload)
        response_json = response.json()

        if response_json.get('status') == 'fail' and response_json.get('code') == 2:
            retry_count += 1
            print(f"Request still in progress, please retry later. Retrying... Attempt {retry_count}")
            time.sleep(RETRY_DELAY * retry_count)
            continue
        elif response_json.get('status') == 'fail':
            print(f"Failed request with error: {response_json.get('error')}")
            return {}

        cursor = response_json.get('cursor')

        while cursor:
            next_url = f"https://in1.api.clevertap.com/1/events.json?cursor={cursor}"
            response = requests.get(next_url, headers=headers)

            try:
                response_json = response.json()
                cursor = response_json.get('next_cursor')

                if 'records' in response_json:
                    for record in response_json['records']:
                        profile = record.get('profile', {})
                        user_id = profile.get('identity')

                        # Validate platform and profile last updated
                        profile_data = profile.get('profileData', {})
                        profile_last_updated = profile_data.get('profile_last_updated', '')

                        # Check if platform, profile last updated match, and email is present
                        if (record.get('event_props', {}).get('platform') == 'Desktop' and
                            validate_date_format(profile_last_updated) and
                            validate_date_format(profile_last_updated).date() == datetime.strptime(str(date_integer), '%Y%m%d').date()):

                            if user_id and user_id not in unique_users:
                                unique_users.add(user_id)

                                # Retrieve channel_name
                                channel_name = profile_data.get('channel_name', '')

                                # Handle multiple channel names and case insensitivity
                                if isinstance(channel_name, list):
                                    channel_name = ' '.join(channel_name)
                                channel_name = channel_name.lower().strip()

                                # Check if channel_name is in the mapping or a substring of a mapped value
                                for mapped_key, mapped_value in mapping.items():
                                    if mapped_key in channel_name or channel_name in mapped_value:
                                        if mapped_value not in count_country:
                                            count_country[mapped_value] = 1
                                        else:
                                            count_country[mapped_value] += 1
                                        break

                if not cursor:
                    break

            except ValueError:
                print(f"Error decoding JSON: {response.text}")
                break

        if count_country:
            return count_country
        else:
            retry_count += 1
            print(f"Retrying... Attempt {retry_count}")
            time.sleep(RETRY_DELAY * retry_count)

    print("Max retries reached. Unable to fetch data.")
    return {}

# Main logic to iterate over dates and fetch data
current_date = start_date
while current_date < end_date:
    date_integer = int(current_date.strftime('%Y%m%d'))
    for event in events:
        start_time = time.time()  # Start timer
        event_data = get_event_data(event, date_integer)
        if not event_data:
            print(f"No data for event {event} on {current_date}")
            current_date += timedelta(days=1)
            continue

        df = pd.DataFrame(list(event_data.items()), columns=['channel_name', 'count'])
        df['Metric Name'] = 'Platform - WEB' 
        df['Date'] = pd.to_datetime(current_date).date()

        # Ensure all required columns are present
        for column in required_columns:
            if column not in df['channel_name'].values:
                df = pd.concat([df, pd.DataFrame({'channel_name': [column], 'count': [0],
                                                 'Metric Name': 'Platform - WEB',
                                                 'Date': pd.to_datetime(current_date).date()})])

        df.reset_index(drop=True, inplace=True)
        df = df[['Date', 'Metric Name', 'channel_name', 'count']]

        end_time = time.time()  # End timer

        # Calculate execution time
        execution_time = end_time - start_time  

        print(df)
        print(f"Execution time for {event} on {current_date}: {execution_time:.2f} seconds")

        # Check if the file and sheet already exist
        try:
            with pd.ExcelWriter(output_file, mode='a', engine='openpyxl', if_sheet_exists='overlay') as writer:
                book = writer.book
                if 'Platform_web' in book.sheetnames:
                    existing_data = pd.read_excel(output_file, sheet_name='Platform_web')
                    if 'Date' not in existing_data.columns:
                        raise KeyError("'Date' column is missing in the existing data")

                    # Ensure uniqueness in existing_data
                    existing_data = existing_data.drop_duplicates(subset=['Date', 'channel_name'])
                    df = df.drop_duplicates(subset=['Date', 'channel_name'])
                    updated_data = pd.concat([existing_data, df], ignore_index=True)

                    updated_data.to_excel(writer, sheet_name='Platform_web', index=False)
                else:
                    df.to_excel(writer, sheet_name='Platform_web', index=False)
        except FileNotFoundError:
            with pd.ExcelWriter(output_file, mode='w', engine='openpyxl') as writer:
                df.to_excel(writer, sheet_name='Platform_web', index=False)

    current_date += timedelta(days=1)

         Date     Metric Name channel_name  count
0  2024-08-27  Platform - WEB  Middle East    448
1  2024-08-27  Platform - WEB        India   4068
2  2024-08-27  Platform - WEB    Singapore    276
3  2024-08-27  Platform - WEB     Malaysia    158
4  2024-08-27  Platform - WEB  Philippines    188
5  2024-08-27  Platform - WEB    Indonesia      4
6  2024-08-27  Platform - WEB     Thailand     10
7  2024-08-27  Platform - WEB     Hongkong      1
8  2024-08-27  Platform - WEB      Vietnam      4
Execution time for PageViewed on 2024-08-27 16:27:48.684817: 120.75 seconds
         Date     Metric Name channel_name  count
0  2024-08-28  Platform - WEB        India   3695
1  2024-08-28  Platform - WEB     Malaysia    156
2  2024-08-28  Platform - WEB  Middle East    435
3  2024-08-28  Platform - WEB    Singapore    310
4  2024-08-28  Platform - WEB  Philippines    213
5  2024-08-28  Platform - WEB     Hongkong      8
6  2024-08-28  Platform - WEB    Indonesia      6
7  2024-08-28  Platform 