In [None]:
# --- Standard Library ---
import os
import sys
import json
from datetime import timedelta

# --- Google Cloud Auth + APIs ---
from google.cloud import bigquery
from google.oauth2 import service_account
# from google.auth.transport.requests import AuthorizedSession

import gspread
from gspread_dataframe import set_with_dataframe

# --- Data & Visualization ---
import pandas as pd
import pandas_gbq
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns

# --- Local Modules ---
from modules.pull_data import pull_and_append
from modules.flattening_json import flatten_extract_params, flatten_row, flatten_nested_column
from modules.column_order import column_order

In [5]:
# --- Configuration ---
SERVICE_ACCOUNT_KEY = './keys/key.json'
DATA_PATH = './data/data.json'
PROJECT_ID = "emojioracle-342f1"
DATASET_ID = "analytics_481352676"
SCOPES = [
    "https://www.googleapis.com/auth/bigquery",
    "https://www.googleapis.com/auth/spreadsheets",
    "https://www.googleapis.com/auth/drive"
]
# --- BigQuery Client ---
credentials = service_account.Credentials.from_service_account_file(
    SERVICE_ACCOUNT_KEY,
    scopes = SCOPES
)
bq_client = bigquery.Client(credentials = credentials, project = PROJECT_ID)

In [23]:
# will pull what is missing from BigQuery and merge into data_path
pull_and_append(credentials, PROJECT_ID, DATASET_ID, DATA_PATH)

Loaded existing data.
Latest event date in existing data: 20250418
No new data found or no tables to import.


In [24]:
pd.set_option('display.max_columns', None) # uncomment to see all of the cols in pandas dataframes

# load the data merged by pull_and_append into df
df = pd.read_json(DATA_PATH)

In [25]:
# flattenning

df = pd.DataFrame([flatten_row(row) for _, row in df.iterrows()]) # for wtfs refer to ./modules/flattening_json.py

In [None]:
# cleaning & preprocessing

## dates and times
df = df.drop(columns=['event_date']) # built in case event_date may not be the same as the one in the event_timestamp

# convert unix time to ape-friendly datetime format
df['event_datetime'] = pd.to_datetime(df['event_timestamp'], unit='us', utc=True) 
df['event_previous_datetime'] = pd.to_datetime(df['event_previous_timestamp'], unit='us', utc=True)
df['event_first_touch_datetime'] = pd.to_datetime(df['user_first_touch_timestamp'], unit='us', utc=True)
df['user.first_open_datetime'] = pd.to_datetime(df['user.first_open_time'], unit='ms', utc=True)


df['event_date'] = df['event_datetime'].dt.date
df['event_time'] = df['event_datetime'].dt.time

df['event_previous_date'] = df['event_previous_datetime'].dt.date
df['event_previous_time'] = df['event_previous_datetime'].dt.time

df['event_first_touch_date'] = df['event_first_touch_datetime'].dt.date
df['event_first_touch_time'] = df['event_first_touch_datetime'].dt.time

df['user.first_open_date'] = df['user.first_open_datetime'].dt.date
df['user.first_open_time'] = df['user.first_open_datetime'].dt.time

df['device.time_zone_offset_hours'] = df['device.time_zone_offset_seconds'] / 3600 # seconds to hours
df['event_params.engagement_time_seconds'] = df['event_params.engagement_time_msec'] / 1000 # ms to seconds
df['event_server_delay_seconds'] = df['event_server_timestamp_offset'] / 1000 # ms to seconds 
df['event_params.time_spent_seconds'] = df['event_params.time_spent'] # just renaming for clarity

df = df.drop(columns=['event_timestamp',
                      'event_previous_timestamp', 
                      'user_first_touch_timestamp', 
                      'event_server_timestamp_offset', 
                      'device.time_zone_offset_seconds', 
                      'event_params.engagement_time_msec',
                      'event_datetime',
                      'event_previous_datetime',
                      'event_params.time_spent',
                      'event_first_touch_datetime',
                      'user.first_open_datetime'
                    ])



In [27]:
# I reordered the columns to make it easier to follow, in column_order.py

df = df[column_order]


df.columns = df.columns.str.replace('.', '__') # replace dots with dashes in column names bc of bigquerys nagging

In [33]:
df.tail(5)

Unnamed: 0,event_name,event_date,event_time,event_previous_date,event_previous_time,event_first_touch_date,event_first_touch_time,event_bundle_sequence_id,user_id,user_pseudo_id,user__first_open_date,user__first_open_time,user__ga_session_id,user__ga_session_number,app_info__id,app_info__firebase_app_id,app_info__version,app_info__install_store,app_info__install_source,device__advertising_id,device__vendor_id,device__category,device__mobile_brand_name,device__mobile_model_name,device__mobile_marketing_name,device__mobile_os_hardware_model,device__operating_system,device__operating_system_version,device__language,device__is_limited_ad_tracking,device__browser,device__browser_version,device__web_info,device__time_zone_offset_hours,geo__city,geo__country,geo__continent,geo__region,geo__sub_continent,geo__metro,traffic_source__name,traffic_source__medium,traffic_source__source,collected_traffic_source,event_params__ga_session_id,event_params__firebase_screen_id,event_params__ad_unit_id,event_params__ad_format,event_params__ad_network,event_params__ad_platform,event_params__ad_shown_where,event_params__answered_wrong,event_params__character_name,event_params__current_qi,event_params__current_tier,event_params__earned_amount,event_params__engaged_session_event,event_params__engagement_time_seconds,event_params__entrances,event_params__firebase_conversion,event_params__firebase_error,event_params__firebase_event_origin,event_params__firebase_screen_class,event_params__ga_session_number,event_params__how_its_earned,event_params__menu_name,event_params__mini_game_name,event_params__mini_game_ri,event_params__previous_first_open_count,event_params__session_engaged,event_params__spent_amount,event_params__spent_to,event_params__system_app,event_params__system_app_update,event_params__time_spent_seconds,event_params__update_with_analytics,event_params__where_its_earned,event_params__where_its_spent,event_params__currency_name,batch_event_index,batch_ordering_id,batch_page_id,privacy_info__ads_storage,privacy_info__analytics_storage,privacy_info__uses_transient_token,event_dimensions,event_server_delay_seconds,event_value_in_usd,ecommerce,is_active_user,platform,stream_id,user_ltv
15386,earn_virtual_currency,2025-04-18,19:22:10.191190,2025-04-18,19:21:54.046190,2025-04-09,09:32:18.415000,750,,6bef18e3d26a07b8a20c26ff7eb26d82,2025-04-09,10:00:00,1745001000.0,3.0,com.TestCompany.TestApp,1:332513280181:android:e9c9cb25732e9477377efc,0.1.0,,com.google.android.packageinstaller,,,mobile,Xiaomi,MI 9,,MI 9,Android,Android 11,en-us,No,,,,3.0,Ankara,Türkiye,Asia,Ankara,Western Asia,(not set),(direct),(none),(direct),,1745001270,7.667933e+17,,,,,,,,,,20.0,1.0,,,,,app,UnityPlayerActivity,3.0,normal,,,,,,,,,,,,question,,gold,188,,,Yes,Yes,No,,1289.674,,,True,ANDROID,10359646141,
15387,menu_opened,2025-04-18,19:22:12.194191,2025-04-18,19:21:09.269191,2025-04-09,09:32:18.415000,750,,6bef18e3d26a07b8a20c26ff7eb26d82,2025-04-09,10:00:00,1745001000.0,3.0,com.TestCompany.TestApp,1:332513280181:android:e9c9cb25732e9477377efc,0.1.0,,com.google.android.packageinstaller,,,mobile,Xiaomi,MI 9,,MI 9,Android,Android 11,en-us,No,,,,3.0,Ankara,Türkiye,Asia,Ankara,Western Asia,(not set),(direct),(none),(direct),,1745001270,7.667933e+17,,,,,,,,,,,1.0,,,,,app,UnityPlayerActivity,3.0,,crystal_menu,,,,,,,,,,,,,,189,,,Yes,Yes,No,,1289.674,,,True,ANDROID,10359646141,
15388,menu_opened,2025-04-18,19:22:12.679192,2025-04-18,19:22:12.194192,2025-04-09,09:32:18.415000,750,,6bef18e3d26a07b8a20c26ff7eb26d82,2025-04-09,10:00:00,1745001000.0,3.0,com.TestCompany.TestApp,1:332513280181:android:e9c9cb25732e9477377efc,0.1.0,,com.google.android.packageinstaller,,,mobile,Xiaomi,MI 9,,MI 9,Android,Android 11,en-us,No,,,,3.0,Ankara,Türkiye,Asia,Ankara,Western Asia,(not set),(direct),(none),(direct),,1745001270,7.667933e+17,,,,,,,,,,,1.0,,,,,app,UnityPlayerActivity,3.0,,crystal_character_menu,,,,,,,,,,,,,,190,,,Yes,Yes,No,,1289.674,,,True,ANDROID,10359646141,
15389,user_engagement,2025-04-18,19:22:37.285193,2025-04-09,16:31:48.368193,2025-04-09,09:32:18.415000,750,,6bef18e3d26a07b8a20c26ff7eb26d82,2025-04-09,10:00:00,1745001000.0,3.0,com.TestCompany.TestApp,1:332513280181:android:e9c9cb25732e9477377efc,0.1.0,,com.google.android.packageinstaller,,,mobile,Xiaomi,MI 9,,MI 9,Android,Android 11,en-us,No,,,,3.0,Ankara,Türkiye,Asia,Ankara,Western Asia,(not set),(direct),(none),(direct),,1745001270,7.667933e+17,,,,,,,,,,,1.0,2886.603,,,,auto,UnityPlayerActivity,3.0,,,,,,,,,,,,,,,,191,,,Yes,Yes,No,,1289.674,,,True,ANDROID,10359646141,
15390,user_engagement,2025-04-18,19:22:38.960194,2025-04-18,19:22:37.285194,2025-04-09,09:32:18.415000,750,,6bef18e3d26a07b8a20c26ff7eb26d82,2025-04-09,10:00:00,1745001000.0,3.0,com.TestCompany.TestApp,1:332513280181:android:e9c9cb25732e9477377efc,0.1.0,,com.google.android.packageinstaller,,,mobile,Xiaomi,MI 9,,MI 9,Android,Android 11,en-us,No,,,,3.0,Ankara,Türkiye,Asia,Ankara,Western Asia,(not set),(direct),(none),(direct),,1745001270,7.667933e+17,,,,,,,,,,,1.0,1.532,,,,auto,UnityPlayerActivity,3.0,,,,,,,,,,,,,,,,192,,,Yes,Yes,No,,1289.674,,,True,ANDROID,10359646141,


In [29]:
df.to_csv('./data/cleaned_data.csv', index=False) # save the cleaned data as a csv file

In [30]:
# Write to BigQuery (once, overwritin
table_id = f"{PROJECT_ID}.{DATASET_ID}.clean_data"

job = bq_client.load_table_from_dataframe(df, table_id, job_config=bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE"
))
job.result()  # Wait for job to finish

print(f"Cleaned data written to {table_id}. Ready to use in Looker.")



Cleaned data written to emojioracle-342f1.analytics_481352676.clean_data. Ready to use in Looker.


In [34]:
df.describe(include='all') # show the data types of the columns

Unnamed: 0,event_name,event_date,event_time,event_previous_date,event_previous_time,event_first_touch_date,event_first_touch_time,event_bundle_sequence_id,user_id,user_pseudo_id,user__first_open_date,user__first_open_time,user__ga_session_id,user__ga_session_number,app_info__id,app_info__firebase_app_id,app_info__version,app_info__install_store,app_info__install_source,device__advertising_id,device__vendor_id,device__category,device__mobile_brand_name,device__mobile_model_name,device__mobile_marketing_name,device__mobile_os_hardware_model,device__operating_system,device__operating_system_version,device__language,device__is_limited_ad_tracking,device__browser,device__browser_version,device__web_info,device__time_zone_offset_hours,geo__city,geo__country,geo__continent,geo__region,geo__sub_continent,geo__metro,traffic_source__name,traffic_source__medium,traffic_source__source,collected_traffic_source,event_params__ga_session_id,event_params__firebase_screen_id,event_params__ad_unit_id,event_params__ad_format,event_params__ad_network,event_params__ad_platform,event_params__ad_shown_where,event_params__answered_wrong,event_params__character_name,event_params__current_qi,event_params__current_tier,event_params__earned_amount,event_params__engaged_session_event,event_params__engagement_time_seconds,event_params__entrances,event_params__firebase_conversion,event_params__firebase_error,event_params__firebase_event_origin,event_params__firebase_screen_class,event_params__ga_session_number,event_params__how_its_earned,event_params__menu_name,event_params__mini_game_name,event_params__mini_game_ri,event_params__previous_first_open_count,event_params__session_engaged,event_params__spent_amount,event_params__spent_to,event_params__system_app,event_params__system_app_update,event_params__time_spent_seconds,event_params__update_with_analytics,event_params__where_its_earned,event_params__where_its_spent,event_params__currency_name,batch_event_index,batch_ordering_id,batch_page_id,privacy_info__ads_storage,privacy_info__analytics_storage,privacy_info__uses_transient_token,event_dimensions,event_server_delay_seconds,event_value_in_usd,ecommerce,is_active_user,platform,stream_id,user_ltv
count,15391,15391,15391,15110,15110,15391,15391,15391.0,0.0,15391,15391,15391,15368.0,15368.0,15391,15391,15391,0.0,15391,0.0,0.0,15391,15362,15362,13845,15391,15391,15391,15391,15391,0.0,0.0,0.0,15391.0,15391,15391,15391,15391,15391,15391,15391,15391,15391,0.0,15391.0,15255.0,396,159,159,396,396,739.0,6356,6356.0,6356.0,3117.0,15187.0,332.0,92.0,2569.0,198.0,15391,15255,15368.0,3117,3936,534,534,10.0,88.0,446.0,446,0.0,0.0,5352.0,0.0,3117,446,3563,15391.0,0.0,0.0,15391,15391,15391,0.0,15391.0,0.0,0.0,15391,15391,15391.0,0.0
unique,17,15,15391,15,15110,12,27,,,27,11,15,,,1,1,1,0.0,2,0.0,0.0,1,2,11,10,12,1,4,4,1,0.0,0.0,0.0,,7,2,2,7,2,1,1,1,1,0.0,,,1,1,1,1,6,,27,16.0,4.0,,,,,,,2,1,,8,13,7,57,,,,93,,,,,4,3,2,,,,1,1,1,0.0,,,0.0,2,1,,0.0
top,question_started,2025-04-06,19:22:38.960194,2025-04-06,19:22:37.285194,2025-04-04,21:10:44.888000,,,ddf2acb862daf1f5e1e9dff579ec87d7,2025-04-04,22:00:00,,,com.TestCompany.TestApp,1:332513280181:android:e9c9cb25732e9477377efc,0.1.0,,com.google.android.packageinstaller,,,mobile,Samsung,SM-A525F,Galaxy A52 4G,SM-A525F,Android,Android 14,tr-tr,No,,,,,Ankara,Türkiye,Asia,Ankara,Western Asia,(not set),(direct),(none),(direct),,,,test_unit_id,test_ad_format,test_ad_network,test_platform,wanna_play_ad,,t,10.0,1.0,,,,,,,app,UnityPlayerActivity,,combo,scroll_menu,stone_mini_game,stone_game,,,,potion,,,,,question,board,gold,,,,Yes,Yes,No,,,,,True,ANDROID,,
freq,3226,3589,1,3573,1,7866,7831,,,7831,7866,9035,,,15391,15391,15391,,15362,,,15391,13546,11702,11702,11702,15391,12516,12896,15391,,,,,14003,15022,15022,14003,15022,15391,15391,15391,15391,,,,396,159,159,396,159,,1456,481.0,3519.0,,,,,,,14785,15255,,1702,1241,160,80,,,,44,,,,,2937,208,3476,,,,15391,15391,15391,,,,,15386,15391,,
mean,,,,,,,,2391.090702,,,,,1744103000.0,6.608863,,,,,,,,,,,,,,,,,,,,2.83159,,,,,,,,,,,1744103000.0,-1.494439e+18,,,,,,1.213802,,,,51.989092,1.0,367.962497,1.0,1.0,10.0,,,6.608863,,,,,2.3,1.0,196.688341,,,,19.105707,,,,,73.87941,,,,,,,493.717088,,,,,10359650000.0,
std,,,,,,,,2425.729602,,,,,283787.7,4.401455,,,,,,,,,,,,,,,,,,,,1.075375,,,,,,,,,,,283817.4,5.719413e+18,,,,,,0.454158,,,,37.584597,0.0,632.779066,0.0,0.0,0.0,,,4.401455,,,,,1.418136,0.0,370.502555,,,,30.54594,,,,,72.311266,,,,,,,725.49035,,,,,0.0,
min,,,,,,,,1.0,,,,,1743771000.0,1.0,,,,,,,,,,,,,,,,,,,,-7.0,,,,,,,,,,,1743771000.0,-9.173667e+18,,,,,,1.0,,,,-120.0,1.0,0.001,1.0,1.0,10.0,,,1.0,,,,,1.0,1.0,1.0,,,,0.0,,,,,1.0,,,,,,,0.014,,,,,10359650000.0,
25%,,,,,,,,323.0,,,,,1743928000.0,3.0,,,,,,,,,,,,,,,,,,,,3.0,,,,,,,,,,,1743928000.0,-6.898577e+18,,,,,,1.0,,,,20.0,1.0,19.1755,1.0,1.0,10.0,,,3.0,,,,,1.0,1.0,50.0,,,,8.758408,,,,,15.0,,,,,,,266.095,,,,,10359650000.0,
50%,,,,,,,,1434.0,,,,,1744022000.0,5.0,,,,,,,,,,,,,,,,,,,,3.0,,,,,,,,,,,1744022000.0,-3.319083e+18,,,,,,1.0,,,,40.0,1.0,119.1065,1.0,1.0,10.0,,,5.0,,,,,2.0,1.0,100.0,,,,11.690674,,,,,51.0,,,,,,,374.578,,,,,10359650000.0,
75%,,,,,,,,4214.0,,,,,1744147000.0,11.0,,,,,,,,,,,,,,,,,,,,3.0,,,,,,,,,,,1744183000.0,3.685923e+18,,,,,,1.0,,,,60.0,1.0,418.95,1.0,1.0,10.0,,,11.0,,,,,3.0,1.0,200.0,,,,18.174141,,,,,113.0,,,,,,,660.783,,,,,10359650000.0,
