# ens_kitchen notebook

Jupyter notebook for data extraction and processing for ENS Endowment data update & analysis. **Execution is on Colab** (not locally).

Sections:
1. **setup:** done in the first section in order to have proper config for the whole nobtebook.
2. **data collection:** section used for collecting data for the jt kitchen.
    - **prices:** fetch prices for ENS portfolio relevant tokens.

In [None]:
"""
Setup all the required variables & logic for the notebook.
"""

# ==============================================
#  Install Required Packages
# ==============================================

# user-built packages to run in the colab
GITHUB_TOKEN = "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"
BRANCH = "main"
! pip install git+https://{GITHUB_TOKEN}@github.com/tom4s-lt/kpk-kitchens.git@{BRANCH}

# ==============================================
#  Import Required Libraries
# ==============================================

# user-built config class and functions
from kpk_kitchens.config import ENSConfig
from kpk_kitchens.utils import etl_gen_df_from_gsheet, gecko_get_price_historical

# Google authentication libraries
from google.colab import auth
auth.authenticate_user()
import gspread
from google.auth import default

# Other libraries
import os
import requests
import pandas as pd

import time
from datetime import datetime

from typing import Optional, Dict, Any, List

# ==============================================
#  Initialize script variables & params
# ==============================================

# google credentials & client
creds, _ = default()
gc = gspread.authorize(creds)

# Create the data directory
os.makedirs(ENSConfig.DATA_DIR, exist_ok=True)

# data collection

## prices

In [None]:
"""
Fetches prices for ens portfolio relevant tokens from CoinGecko.

Most failures come from duplicate assets in the asset tab.
"""

# Fetch assets from Google Sheet
json_lk_assets = etl_gen_df_from_gsheet(gc, ENSConfig.WORKBOOK_URL, ENSConfig.LK_ASSETS)

# filter - only ENS assets
json_ens_assets = [
    asset for asset in json_lk_assets
    if asset.get("company") == "ENS"
]

# Separate stablecoins and non-stablecoins
stablecoins = [
    asset for asset in json_ens_assets
    if asset.get("type_market") == "stablecoin"
]

non_stablecoins = [
    asset for asset in json_ens_assets
    if asset.get("type_market") != "stablecoin"
]

print(f"Found {len(stablecoins)} stablecoins and {len(non_stablecoins)} non-stablecoins")

# Filter duplicates on symbol_level_0 for non_stablecoins
non_stablecoins = list({
    asset.get("symbol_level_0", ""): asset 
    for asset in non_stablecoins 
    if asset.get("symbol_level_0", "")
}.values())

print("\nOnly level_0/underlying is fetched because that's waht's prices in the reporting")

# Fetch and process price data for non-stablecoin assets
price_data = []
for asset in non_stablecoins:
    print(f"Fetching data for {asset['symbol']}...")
    
    gecko_hist_data = gecko_get_price_historical(
        base_url=ENSConfig.COINGECKO_API_BASE_URL,
        asset_id=asset['id_gecko'],
        api_key=ENSConfig.COINGECKO_API_KEY,
        max_retries=ENSConfig.MAX_RETRIES,
        retry_delay=ENSConfig.RETRY_DELAY,
        timeout=ENSConfig.DEFAULT_TIMEOUT,
        # params is function default - 365 days max with free key
        headers={
            'accept': 'application/json',
            'x-cg-demo-api-key': ENSConfig.COINGECKO_API_KEY
        }
    )

    if gecko_hist_data:
        # Create DataFrame for current asset
        df = pd.DataFrame(gecko_hist_data['prices'], columns=['ts', 'price'])
        df['id_gecko'] = asset['id_gecko']
        df['symbol'] = asset['symbol']
        price_data.append(df)
        print(f"Successfully fetched data for {asset['symbol']}")

    time.sleep(3)  # Rate limiting

print("\nPrice data collection complete")

# Process price data
print("\nProcessing price data...")
df_prices = pd.concat(price_data)
df_prices['date'] = pd.to_datetime(df_prices['ts'], unit='ms')

# Resample to daily frequency and calculate mean prices
df_prices = (df_prices
    .groupby(['symbol', 'id_gecko'])
    .resample('D', on='date')
    .mean()
    .reset_index()
    [['date', 'symbol', 'id_gecko', 'price']]  # Drop ts
    .sort_values('date', ascending=False)
)

print("\nPrice data processing complete")

# Add stablecoin data with price=1
if stablecoins:
    print("\nAdding stablecoin data...")
    # Get unique dates from the price data
    dates = df_prices['date'].unique()

    # Create stablecoin records
    stablecoin_data = []
    for asset in stablecoins:
        for date in dates:
            stablecoin_data.append({
                'date': date,
                'symbol': asset['symbol'],
                'id_gecko': asset['id_gecko'],
                'price': 1.0
            })

    # Convert to DataFrame and append to price data
    df_stablecoins = pd.DataFrame(stablecoin_data)
    df_prices = pd.concat([df_prices, df_stablecoins], ignore_index=True)
    df_prices = df_prices.sort_values('date', ascending=False)

print("\nStablecoin prices complete")

# Export results
print(f"\nExporting results to {ENSConfig.PRICES_CSV}...")
df_prices.to_csv(ENSConfig.PRICES_CSV, index=False)
print("\nExport complete!")