In [1]:
import polars as pl
import os
from datetime import datetime

In [2]:
RAW_DATA_PATH = '../data/raw/hub_notifications_logs.json'
TRANSFORMED_DATA_PATH = '../data/transformed/hub_notifications_logs_transformed.csv'

df = pl.read_json(RAW_DATA_PATH)

**Transformation 1:Drop & Rename columns**

In [3]:
# Renaming columns
df = df.rename({
    'id': 'id_notificationlogs', 
    'capstone_name': 'capstone_name_notificationlogs',
    'capstone_email': 'capstone_email_notificationlogs', 
    'capstone_employee_id': 'capstone_employee_id_notificationlogs'
})

# Dropping columns
df = df.drop(['_rid','_self','_etag','_attachments','_ts'])

# Dropping rows with null values
df = df.drop_nulls()

#df.head()

**Transformation 2:This script handles two timestamp formats: Epoch time and ISO 8601 to datetime strings.**

In [4]:
# Convert timestamps to "YYYY-MM-DD HH:MM:SS"
def convert_mixed_timestamp(ts: str):
    try:
        ts_float = float(ts)
        ts_int = int(ts_float)
        dt = datetime.utcfromtimestamp(ts_int / 1000)
        return dt.strftime("%Y-%m-%d %H:%M:%S")
    except:
        try:
            dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
            return dt.strftime("%Y-%m-%d %H:%M:%S")
        except:
            return None

# Apply to DataFrame
df = df.with_columns([
    pl.col("view_time_utc").map_elements(convert_mixed_timestamp).alias("converted_time")
])

# Optional: filter out failed conversions
df = df.filter(pl.col("converted_time").is_not_null())

# Preview
#print(df.head())


  df = df.with_columns([


In [5]:
df = df.rename({'converted_time': 'View_time'})
df = df.drop(['view_time_utc'])

In [6]:
# Write transformed data
os.makedirs(os.path.dirname(TRANSFORMED_DATA_PATH), exist_ok=True)
df.write_csv(TRANSFORMED_DATA_PATH)
print(f'Transformed data saved to {TRANSFORMED_DATA_PATH}')

Transformed data saved to ../data/transformed/hub_notifications_logs_transformed.csv
