In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import requests
import json
import pandas as pd
from datetime import datetime
import pytz
import os

# Initialize the WebDriver
def initialize_driver():
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service)
    site_url = "https://prod-bop.securiti.xyz/"
    driver.get(site_url)
    return driver

# Wait for user interaction in the browser
def wait_for_user():
    print("Browser is open. Please log in to the site if required.")
    input("Press Enter in the terminal when you are ready to continue...")
    print("Continuing the script...")

# Retrieve cookies from the browser session
def get_cookies(driver):
    selenium_cookies = driver.get_cookies()
    return {cookie['name']: cookie['value'] for cookie in selenium_cookies}

# Fetch data from API and save as JSON
def fetch_and_save_data(api_url, filename, cookies):
    try:
        response = requests.get(api_url, cookies=cookies)
        response.raise_for_status()
        data = response.json().get("data", [])
        
        with open(filename, "w") as file:
            json.dump(data, file, indent=4)
        
        print(f"Data successfully saved to {filename}")
    except requests.exceptions.RequestException as e:
        print(f"An error occurred while fetching {filename}: {e}")

# Convert epoch time to human-readable format
def convert_epoch_to_human(epoch_time, timezone='UTC'):
    epoch_time_seconds = epoch_time / 1000  # Convert milliseconds to seconds
    tz = pytz.timezone(timezone)
    dt = datetime.fromtimestamp(epoch_time_seconds, tz=pytz.utc).astimezone(tz)
    return dt.strftime('%m-%d-%Y')

# Process JSON files and save data to an Excel file
def process_json_to_excel():
    # Keywords to identify timestamp columns
    keywords = ['created_at', 'modified_at', 'timestamp', 'last_scan_timestamp', 'next_scan_timestamp', 'published_at']
    
    # Specify the timezone
    timezone = 'UTC'

    # Create an Excel writer
    excel_writer = pd.ExcelWriter('processed_data.xlsx', engine='openpyxl')

    # Iterate over the 7 JSON files
    for i in range(1, 8):
        filename = f"{i}-pod.json"
        if os.path.exists(filename):  # Ensure file exists
            try:
                # Read JSON file into DataFrame
                data = pd.read_json(filename)
                
                # Process timestamp columns
                for column in data.columns:
                    if any(keyword in column.lower() for keyword in keywords):
                        data[column] = data[column].apply(
                            lambda x: convert_epoch_to_human(x, timezone) if isinstance(x, (int, float)) and x > 1_000_000_000_000 else x
                        )
                
                # Write DataFrame to the Excel sheet
                data.to_excel(excel_writer, sheet_name=f"Sheet{i}", index=False)
                print(f"Processed {filename} and saved to Excel.")
            except Exception as e:
                print(f"Error processing {filename}: {e}")
    
    # Save the Excel file
    excel_writer.save()
    print("Data processed and saved to 'processed_data.xlsx'.")

# Main script
def main():
    driver = initialize_driver()
    
    # Wait for user to perform actions in the browser
    wait_for_user()
    
    # Navigate to the tenant site
    site_tenant = "https://app.securiti.ai/#/"
    driver.get(site_tenant)
    
    # Get cookies from the browser session
    cookies = get_cookies(driver)
    
    # Define API endpoints and corresponding filenames
    endpoints = [
        ("https://app.securiti.ai/core/v1/admin/appliance", "1-pod.json"),
        ("https://app.securiti.ai/privaci/v1/admin/datasources?sort=name&p=1&limit=35&ds_connector_type=cloud&ds_connector_type=onprem", "2-data_systems.json"),
        ("https://app.securiti.ai/privaci/v1/admin/customer_storage", "3-private_cloud_storage.json"),
        ("https://app.securiti.ai/privaci/v1/admin/tenants/configs/setting_export_compressed_csv_format", "4-csv_export.json"),
        ("https://app.securiti.ai/core/v1/admin/tenant/security", "5-mfa_password.json"),
        ("https://app.securiti.ai/privaci/v1/admin/cmp/domain?sort=id&p=1&limit=0", "6-cookie_overview.json"),
        ("https://app.securiti.ai/privaci/v1/admin/dsr/forms/counts", "7-dsr_count.json")
    ]
    
    # Fetch data and save JSON files
    for api_url, filename in endpoints:
        fetch_and_save_data(api_url, filename, cookies)
    
    driver.quit()

    # Process JSON files into an Excel file
    process_json_to_excel()

if __name__ == "__main__":
    main()


Data successfully saved to merged_data.xlsx


In [2]:
import pandas as pd
from datetime import datetime
import pytz

# Function to convert epoch time (in milliseconds) to human-readable format with timezone awareness
def convert_epoch_to_human(epoch_time, timezone='UTC'):
    epoch_time_seconds = epoch_time / 1000  # Convert milliseconds to seconds
    tz = pytz.timezone(timezone)
    dt = datetime.fromtimestamp(epoch_time_seconds, tz=pytz.utc).astimezone(tz)
    return dt.strftime('%m-%d-%Y') #%H:%M:%S'

# Read the Excel file with all sheets
df_sheets = pd.read_excel('merged_data.xlsx', sheet_name=None)  # 'sheet_name=None' reads all sheets into a dictionary

# Specify the desired timezone for conversion
timezone = 'UTC'  # Modify as needed, e.g., 'America/New_York'

# Keywords to look for in column names
keywords = ['created_at', 'modified_at', 'timestamp','last_scan_timestamp', 'next_scan_timestamp', 'timestamp', 'published_at']  # Add other keywords as needed

# Open an Excel writer to save all processed sheets in one output file
with pd.ExcelWriter('data_with_human_dates.xlsx', engine='openpyxl') as writer:
    # Loop through each sheet in the Excel file
    for sheet_name, sheet_data in df_sheets.items():
        # Identify columns that match any keyword in the list
        for column in sheet_data.columns:
            if any(keyword in column.lower() for keyword in keywords):  # Check if any keyword exists in the column name
                # Apply conversion only if the column is likely an epoch timestamp in milliseconds
                sheet_data[column] = sheet_data[column].apply(
                    lambda x: convert_epoch_to_human(x, timezone) if isinstance(x, (int, float)) and (x > 1000000000000) else x
                )
        
        # Write the processed sheet to the Excel file
        sheet_data.to_excel(writer, sheet_name=sheet_name, index=False)

print("Epoch times converted and saved to 'data_with_human_dates.xlsx'")


Epoch times converted and saved to 'data_with_human_dates.xlsx'
