In [1]:
import pandas as pd
import numpy as np
import duckdb as ddb
# import plotly.express as px
import matplotlib.pyplot as plt
import dotenv

In [2]:
env = dotenv.dotenv_values(".env")

In [3]:
con = ddb.connect()

In [4]:
con.install_extension('httpfs')
con.load_extension('httpfs')

In [5]:
con.sql(f"""
CREATE OR REPLACE SECRET (
    TYPE r2,
    KEY_ID '{env['R2_ACCESS_KEY_ID']}',
    SECRET '{env['R2_SECRET_ACCESS_KEY']}',
    ACCOUNT_ID '{env['R2_ACCOUNT_ID']}'
);
""")

┌─────────┐
│ Success │
│ boolean │
├─────────┤
│ true    │
└─────────┘

In [6]:
BUCKET = "financial-data-store"
ASSET = "mutual_funds"
CATEGORY = "clean"
PATH = f"r2://{BUCKET}/{ASSET}/{CATEGORY}/"

In [7]:
nav_df = con.execute(f"""
    SELECT * 
    FROM read_parquet('{PATH}nav_daily_growth_plan.parquet')
""").df()

: 

In [None]:
PATH

In [None]:
nav_df.head()

In [None]:
analysis_df = (nav_df
               .query("(scheme_category_level1 == 'Equity Scheme') & (is_direct == True)")
               .sort_values(['scheme_code', 'date'], ascending=[True, True])
               .assign(perc_return=lambda x: x.groupby('scheme_code')['nav'].pct_change(),
                       log_return=lambda x: np.log1p(x['perc_return'])
                       )
               )

In [None]:
group_cols = ['scheme_code', 'isin_growth', 'isin_dividend',
              'amc_name', 'scheme_name', 'scheme_type', 'scheme_category',
              'scheme_nav_name', 'scheme_category_level1', 'scheme_category_level2',
              'is_direct', 'is_growth_plan']

In [None]:
annual_df = (analysis_df
             .groupby([*group_cols, pd.Grouper(key='date', freq='YE')], dropna=False)['log_return'].sum()
             .reset_index()
             )

In [None]:
annual_df.to_clipboard()

In [None]:
analysis_df.columns

In [None]:
analysis_df

In [None]:
check = nav_df.groupby(['scheme_code', 'scheme_name'],
                       as_index=False).agg({'date': ['max', 'min']})