In [None]:
!pip install pandas
!pip install h5py
!pip install jupyter-dash

Collecting jupyter-dash
  Downloading jupyter_dash-0.4.2-py3-none-any.whl (23 kB)
Collecting dash (from jupyter-dash)
  Downloading dash-2.17.0-py3-none-any.whl (7.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m46.7 MB/s[0m eta [36m0:00:00[0m
Collecting retrying (from jupyter-dash)
  Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Collecting ansi2html (from jupyter-dash)
  Downloading ansi2html-1.9.1-py3-none-any.whl (17 kB)
Collecting dash-html-components==2.0.0 (from dash->jupyter-dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Collecting dash-core-components==2.0.0 (from dash->jupyter-dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Collecting dash-table==5.0.0 (from dash->jupyter-dash)
  Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Collecting jedi>=0.16 (from ipython->jupyter-dash)
  Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━

# New Section

In [None]:
import pandas as pd
import h5py
import numpy as np
import os
from urllib import request
import ssl
import datetime
import urllib
import logging
import sys
from jupyter_dash import JupyterDash as Dash
from dash import html, dcc, callback, Output, Input
import plotly.express as px

from google.colab import output; output.enable_custom_widget_manager()

logging.basicConfig(format='%(asctime)s | %(levelname)s : %(message)s',
                     level=logging.INFO, stream=sys.stdout)

context = ssl._create_unverified_context()



In [None]:
class DataStore:
    URL = 'https://utasl.lk/reports/index3.php?date='

    def __init__(self, file_name):
        self.file_name = file_name
        self.logger = logging.getLogger()
        self.logger.setLevel(logging.INFO)

    def add(self, df : pd.DataFrame, overwrite=False):
        with open(self.file_name, 'w' if overwrite else 'a') as f:
          df.to_csv(f, index=True, header=None)

    def read(self):
        return pd.read_csv(self.file_name, names=['date','fund','sell','buy'], header=None)

    def __str__(self):
        return str(self.read())

    @staticmethod
    def date_iterator(start_date, end_date):
      """Returns an iterator that yields dates from start_date to end_date, one day at a time."""
      current_date = start_date
      while current_date <= end_date:
        yield current_date
        current_date += datetime.timedelta(days=1)

    @staticmethod
    def get_pd_tables_from_url(date):
      response = request.urlopen(DataStore.URL + date, context=context)
      html = response.read()
      table = pd.read_html(html)
      return table

    @staticmethod
    def remove_header_and_footer_from_table(df: pd.DataFrame):
      df.drop(df.index[:2], axis=0, inplace=True)
      df = df[2:-1]
      return df


    def download_and_save_data(self,start_date, end_date):
      start_date = datetime.datetime.strptime(start_date, '%m-%d-%Y').date()
      end_date = datetime.datetime.strptime(end_date, '%m-%d-%Y').date()

      for date in DataStore.date_iterator(start_date, end_date):
        day = date.weekday()
        if day < 5:
          # Download
          tables = DataStore.get_pd_tables_from_url(str(date.strftime('%Y-%m-%d'))) #'%m%%2F%d%%2F%Y'
          if len(tables) > 0:
            df = DataStore.remove_header_and_footer_from_table(tables[0])
            df.insert(0,'date',str(date.strftime('%m-%d-%Y')))
            df.columns = ['date','fund','sell','buy']
            df.set_index('date', inplace=True)
            # display(df)
            self.add(df)
            now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            print(f'{now} - Downloaded and saved {date}. #{len(df)} funds.')


datastore = DataStore('/content/drive/MyDrive/ut/data.csv')

In [None]:
datastore.download_and_save_data('02-01-2024','05-21-2024')

HTTPError: HTTP Error 403: Forbidden

In [None]:
df = datastore.read()


In [None]:
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)
df.sort_index(ascending=True, inplace=True)
df.drop_duplicates(inplace=True)


In [None]:
datastore.add(df,overwrite=True)

In [None]:


df['sell'] = df['sell'].astype(str).str.extract(r'(\d+\.\d+)', expand=False).astype(float)

# Calculate the percentage change over the previous value for each series
df['percentage_change'] = df.groupby('fund')['sell'].pct_change()

# Define a threshold for outlier detection (e.g., 10% change)
threshold = 0.05

# Filter out the rows that exceed the threshold for each series
filtered_df = df[abs(df['percentage_change']) <= threshold]

# Drop the 'percentage_change' column if no longer needed
df = filtered_df.drop(columns=['percentage_change'])
# display(df)

In [None]:
from pandas.io.formats.style import Subset
fund_names = df['fund'].unique()
fund_names.sort()

import ipywidgets as widgets
import plotly.graph_objects as go
dff = df[df.fund=='CAL Balanced Fund']
# dff.drop_duplicates(keep='first')


# fig = go.Figure([go.scatter.Line(x=dff.index, y=dff['sell'])])
fig = px.line(dff,x=dff.index,y='sell', height=800)
f = go.FigureWidget(fig)
display(f)

def on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        # print("changed to %s" % change['new'])
        global df
        global dff
        dff = df[df.fund==change['new']]
        dff.drop_duplicates(keep='first')
        d = f.data[0]
        d.x = dff.index
        d.y = pd.to_numeric(dff['sell'])


dropdown = widgets.Dropdown(options=fund_names)
dropdown.observe(on_change)
display(dropdown)

FigureWidget({
    'data': [{'hovertemplate': 'date=%{x}<br>sell=%{y}<extra></extra>',
              'legendgroup': '',
              'line': {'color': '#636efa', 'dash': 'solid'},
              'marker': {'symbol': 'circle'},
              'mode': 'lines',
              'name': '',
              'showlegend': False,
              'type': 'scattergl',
              'uid': '5eb570b7-609e-4379-b3a6-0ebde50113d8',
              'x': array([datetime.datetime(2015, 11, 13, 0, 0),
                          datetime.datetime(2015, 11, 16, 0, 0),
                          datetime.datetime(2015, 11, 17, 0, 0), ...,
                          datetime.datetime(2024, 2, 28, 0, 0),
                          datetime.datetime(2024, 2, 29, 0, 0),
                          datetime.datetime(2024, 3, 4, 0, 0)], dtype=object),
              'xaxis': 'x',
              'y': array([10.4475, 10.4547, 10.4571, ..., 17.0585, 17.0196, 16.983 ]),
              'yaxis': 'y'}],
    'layout': {'height': 800,
   

Dropdown(options=('ArpicoAtaraxia S&P SL 20 Index Fund', 'Assetline Gilt Edged Fund', 'Assetline Income Fund',…