In [1]:
import pandas as pd
import talib as ta
import numpy as np

# pd.options.display.float_format = '{:.1f}'.format
pd.options.display.float_format = '{:.2f}'.format

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
from pathlib import Path
from sys import path

notebook_path = Path.cwd()
SITE = notebook_path.parent
path.append(str(SITE.absolute()))
from libs.a_helpers import *
from libs.c_helpers import *

In [4]:
ticker = "VNI"
filename = SITE / f'investing/data/exports/indices/{ticker}.csv'
df = pd.read_csv(filename, infer_datetime_format=True)[:-1]
df['Volume'] = df['Volume'].astype(int)
df.set_index('Date', inplace=True)
df.index = pd.to_datetime(df.index)

In [5]:
df.rename(columns={'Volume': 'Vol', 'Change': 'Chg'}, inplace=True)

In [6]:
# daily_df = df.copy()
# # daily_df['Return'] = 1 + (daily_df['Change'] / 100)
# # daily_df['CumReturn'] = daily_df['Return'].cumprod()

In [7]:
df.tail()

Unnamed: 0_level_0,Close,Open,High,Low,Vol,Chg
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-03-25,1267.86,1281.8,1286.84,1264.51,1240000,-1.09
2024-03-26,1282.21,1267.86,1282.5,1264.42,941700,1.13
2024-03-27,1283.09,1282.21,1286.72,1276.95,919280,0.07
2024-03-28,1290.18,1283.09,1293.9,1283.09,1010000,0.55
2024-03-29,1284.09,1290.18,1293.04,1282.44,922550,-0.47


In [8]:
weekly_df = df.resample('W').agg({'Close': 'last', 'Open': 'first', 'High': 'max', 'Low': 'min', 'Vol': 'sum', 'Chg': 'sum'})

weekly_df['WLowMax'] = df.groupby(pd.Grouper(freq='W'))['Low'].max()
weekly_df['WHighMin'] = df.groupby(pd.Grouper(freq='w'))['High'].min()
weekly_df['WM_Diff'] = weekly_df['WLowMax'] - weekly_df['WHighMin']

# weekly_df['WM_Diff'] = abs(weekly_df['WLowMax'] - weekly_df['WHighMin'])
# weekly_df['WM_Stat'] = weekly_df.apply(lambda row: 1 if row['WLowMax'] < row['WHighMin'] else 0, axis=1)

weekly_df['WStat'] = weekly_df.apply(lambda row: 1 if row['Chg'] > 0 else 0, axis=1)

# weekly_df.to_csv(f'data/{filename}_OK_Weekly.csv')

In [9]:
weekly_df.tail(10)

Unnamed: 0_level_0,Close,Open,High,Low,Vol,Chg,WLowMax,WHighMin,WM_Diff,WStat
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-01-28,1175.67,1181.5,1186.96,1168.5,3422420,-0.48,1175.54,1173.83,1.71,0
2024-02-04,1172.55,1175.67,1182.53,1162.45,3991600,-0.25,1174.88,1174.5,0.38,0
2024-02-11,1198.53,1172.55,1199.52,1172.55,2180230,2.2,1188.48,1188.45,0.03,1
2024-02-18,1209.7,1203.08,1210.25,1198.81,1691360,0.93,1202.5,1207.56,-5.06,1
2024-02-25,1212.0,1209.7,1240.81,1209.7,5209210,0.21,1226.15,1225.48,0.67,1
2024-03-03,1258.28,1212.0,1264.39,1210.52,4899290,3.76,1247.87,1225.21,22.66,1
2024-03-10,1247.35,1258.28,1277.51,1247.35,5620000,-0.85,1260.24,1268.25,-8.01,0
2024-03-17,1263.78,1247.35,1276.41,1233.14,4924440,1.34,1258.67,1248.33,10.34,1
2024-03-24,1281.8,1263.78,1291.27,1221.67,6170220,1.45,1276.38,1249.75,26.63,1
2024-03-31,1284.09,1281.8,1293.9,1264.42,5033530,0.19,1283.09,1282.5,0.59,1


In [10]:
monthly_df = weekly_df.resample('M').agg({'Close': 'last','Open': 'first','High': 'max','Low': 'min','Vol': 'sum','Chg': 'sum'})

# monthly_df['Month'] = monthly_df.index.strftime('%b')

monthly_df['MLowMax'] = weekly_df.groupby(pd.Grouper(freq='M'))['Low'].max()
monthly_df['MHighMin'] = weekly_df.groupby(pd.Grouper(freq='M'))['High'].min()

monthly_df['MM_Diff'] = monthly_df['MLowMax'] - monthly_df['MHighMin']
monthly_df['MStat'] = monthly_df.apply(lambda row: 1 if row['Chg'] > 0 else 0, axis=1)

# monthly_df.to_csv(f'data/{filename}_OK_Monthly.csv')

In [11]:
monthly_df.tail(10)

Unnamed: 0_level_0,Close,Open,High,Low,Vol,Chg,MLowMax,MHighMin,MM_Diff,MStat
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-06-30,1129.38,1063.76,1130.61,1063.76,18368060,6.02,1102.58,1092.24,10.34,1
2023-07-31,1207.67,1129.38,1207.67,1119.95,22071440,6.74,1183.39,1138.67,44.72,1
2023-08-31,1183.37,1207.67,1246.22,1149.13,21575570,-1.86,1213.37,1193.79,19.58,0
2023-09-30,1193.05,1183.37,1255.11,1174.57,19023200,0.9,1224.05,1224.84,-0.79,1
2023-10-31,1060.62,1193.05,1195.62,1037.46,17893350,-11.41,1127.03,1111.39,15.64,0
2023-11-30,1095.61,1054.03,1132.75,1020.01,16842970,3.46,1093.19,1082.23,10.96,1
2023-12-31,1129.93,1095.61,1135.05,1076.45,18335930,3.12,1103.06,1104.65,-1.59,1
2024-01-31,1175.67,1129.93,1186.96,1128.32,14944510,3.99,1168.5,1160.08,8.42,1
2024-02-29,1212.0,1175.67,1240.81,1162.45,13072400,3.09,1209.7,1182.53,27.17,1
2024-03-31,1284.09,1212.0,1293.9,1210.52,26647480,5.89,1264.42,1264.39,0.03,1


In [12]:
yearly_df = monthly_df.resample('Y').agg({'Close': 'last','Open': 'first','High': 'max','Low': 'min','Vol': 'sum','Chg': 'sum'})

yearly_df['YLowMax'] = weekly_df.groupby(pd.Grouper(freq='Y'))['Low'].max()
yearly_df['YHighMin'] = weekly_df.groupby(pd.Grouper(freq='Y'))['High'].min()

yearly_df['YM_Diff'] = yearly_df['YLowMax'] - yearly_df['YHighMin']
yearly_df['YStat'] = yearly_df.apply(lambda row: 1 if row['Chg'] > 0 else 0, axis=1)

In [13]:
yearly_df

Unnamed: 0_level_0,Close,Open,High,Low,Vol,Chg,YLowMax,YHighMin,YM_Diff,YStat
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2000-12-31,206.83,101.55,206.83,101.55,3720,73.5,202.32,105.2,97.12,1
2001-12-31,239.27,210.56,571.04,203.12,19730,23.31,518.62,214.32,304.3,1
2002-12-31,183.05,235.4,235.4,174.62,37040,-25.89,227.79,175.77,52.02,0
2003-12-31,166.03,182.65,183.41,130.9,52640,-8.75,182.39,134.2,48.19,0
2004-12-31,240.24,165.98,280.53,165.57,242840,39.11,272.89,166.94,105.95,1
2005-12-31,309.14,240.49,325.25,232.41,351940,26.02,317.73,235.14,82.59,1
2006-12-31,751.77,309.56,815.98,304.23,1128360,94.07,744.15,307.5,436.65,1
2007-12-31,927.02,751.04,1179.32,741.27,2389680,24.6,1132.23,818.04,314.19,1
2008-12-31,304.46,925.66,926.01,284.06,3365260,-104.46,903.09,299.54,603.55,0
2009-12-31,495.08,306.45,633.21,234.66,10882770,54.59,605.8,247.64,358.16,1


In [14]:
# three_yearly_df = df.resample('3Y').agg({'Close': 'last', 'Open': 'first', 'High': 'max', 'Low': 'min', 'Volume': 'sum', 'Change': 'sum'})
# three_yearly_df