In [1]:
import gradio as gr
import pandas as pd
import numpy as np
import json
import io 
import sys
from PIL import Image
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores.faiss import FAISS

  from .autonotebook import tqdm as notebook_tqdm


# Recommendation generation

In [2]:
embeddings = HuggingFaceEmbeddings()

modules.json: 100%|██████████| 349/349 [00:00<00:00, 187kB/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
config_sentence_transformers.json: 100%|██████████| 116/116 [00:00<00:00, 23.5kB/s]
README.md: 100%|██████████| 10.6k/10.6k [00:00<?, ?B/s]
sentence_bert_config.json: 100%|██████████| 53.0/53.0 [00:00<?, ?B/s]
config.json: 100%|██████████| 571/571 [00:00<?, ?B/s] 
pytorch_model.bin: 100%|██████████| 438M/438M [01:03<00:00, 6.90MB/s] 
tokenizer_config.json: 100%|██████████| 363/363 [00:00<?, ?B/s] 
vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 2.16MB/s]
tokenizer.json: 100%|██████████| 466k/466k [00:00<00:00, 912kB/s]
special_tokens_map.json: 100%|██████████| 239/239 [00:00<?, ?B/s] 
1_Pooling/config.json: 100%|██████████| 190/190 [00:00<00:00, 13.9kB/s]


In [3]:
new_db = FAISS.load_local("../../vector_db/phase2_symbols", embeddings)

In [4]:
def get_recommendations(query, top_k):
  relevant_stocks = new_db.similarity_search_with_relevance_scores(query= query, k = top_k)
  recommendations = [(relevant_stocks[idx][0].metadata.get('symbol'), relevant_stocks[idx][0].metadata.get('name'), relevant_stocks[idx][1]) for idx in range(len(relevant_stocks))]
  for idx, data in enumerate(recommendations):
    if data[2]>=0:
      print('{}. {} : {} |score : {}'.format(idx+1, data[0], data[1], data[2]))


In [5]:
stock_pref = 'businesses in the domain of software and Inforamation technology'

get_recommendations(stock_pref, top_k = 10)

1. ECL : E - CHANNELLING PLC |score : 0.15225744611490732
2. SLTL : SRI LANKA TELECOM PLC |score : 0.1514007692720748
3. GEST : GESTETNER OF CEYLON PLC |score : 0.09990819427867814
4. HAYL : HAYLEYS PLC |score : 0.0751765921598796
5. DIAL : DIALOG AXIATA PLC |score : 0.07034462456549961
6. LPL : LAUGFS POWER PLC |score : 0.06791991637066785
7. DIMO : DIESEL & MOTOR ENGINEERING PLC |score : 0.06789513402374348
8. HBS : hSenid Business Solutions PLC |score : 0.06200249883973841
9. MELS : MELSTACORP PLC |score : 0.060525841855310736
10. MDL : MYLAND DEVELOPMENTS PLC |score : 0.05051326793566868


# Insight generation

In [6]:
from datetime import datetime

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px

In [7]:
data_path = '../../data/phase_2/price_data_v2.xlsx'

data = pd.read_excel(data_path, index_col= False)

In [8]:
data.head()


Unnamed: 0,SECURITYCODE,OPENINGPRICE,HIGHPX,LOWPX,CLOSINGPRICE,TRADEDATE,UNIX_TS
0,EXT,7.3,7.5,7.3,7.5,2024-02-07,1707264000
1,COOP,2.3,2.3,2.2,2.2,2024-02-07,1707264000
2,MDL,8.3,8.6,8.3,8.6,2024-02-07,1707264000
3,FCT,24.9,26.4,24.8,26.2,2024-02-07,1707264000
4,HBS,11.5,11.6,11.5,11.5,2024-02-07,1707264000


In [9]:
# def split_(stock_name):
#     return stock_name.split('.')[0]
# data['SECURITYCODE'] = data['SECURITYCODE'].apply(lambda x : split_(x))
# data = data[['SECURITYCODE','OPENINGPRICE','HIGHPX', 'LOWPX', 'CLOSINGPRICE', 'TRADEDATE', 'UNIX_TS']].dropna(axis= 0, how = 'any')
# data

# Plot OHLC

In [10]:
def plot_ohlc(data, title):
    x_vals = data.TRADEDATE

    fig = make_subplots(rows=1, cols=1, vertical_spacing=0.01 , specs=[[{'rowspan':1, 'type':'Candlestick'}]],shared_xaxes=True)
    ohlc_obj = go.Candlestick(x=x_vals, open = data.OPENINGPRICE, high=data.HIGHPX, low=data.LOWPX, close=data.CLOSINGPRICE, name= title)
    fig.add_trace(ohlc_obj, row = 1, col = 1)
    fig.update_layout(title = title)
    fig.show(config={
        'modeBarButtonsToRemove': ['zoom', 'pan']
    })

In [11]:
# from turtle import bgcolor
# from matplotlib.axis import XAxis


def plot_ohlc_w_des(data, titile, des):
    x_vals = data.TRADEDATE

    fig = make_subplots(rows=1, cols=2, vertical_spacing=0.01, column_widths=[0.15, 0.85], subplot_titles=["", "Chart"]) #,shared_xaxes=True
    ohlc_obj = go.Candlestick(x=x_vals, open = data.OPENINGPRICE, high=data.HIGHPX, low=data.LOWPX, close=data.CLOSINGPRICE, name= 'OHLC')
    fig.add_trace(ohlc_obj, row = 1, col = 2)
    fig.add_trace(go.Scatter(x=[], y=[]), row=1, col=1)

    fig.add_annotation(x=10, y=10, xref="paper", yref="paper",
                   text=des, showarrow=False,
                   align = 'left',
                   font=dict(size=10, color="black"),
                   row=1, col=1)

    fig.update_layout(xaxis=dict(visible=False), yaxis=dict(visible=False))
    fig.update_yaxes(showgrid=False, zeroline=False, showticklabels=False, row=1, col=1)
    fig.update_annotations(selector=dict(row=1, col=1), paper_bgcolor='rgba(0,0,0,0)',plot_bgcolor='rgba(0,0,0,0)')
    fig.update_layout(height=500, width=1500, title_text=titile)

    return fig

## OHLC candles

## Volatility calculations

In [12]:
def get_log_returns_v2(df ,close_col , horizon):
  data1 = df[close_col][horizon:].to_numpy()
  data2 = df[close_col][:-horizon].to_numpy()
  df = df.reset_index(drop = True)
  df.loc[horizon:, 'returns'] = data1/data2
  df['log_returns'] = np.log(df['returns'])
  df = df.dropna(axis = 0, how = 'any').reset_index(drop = True)
  return df

In [13]:
def get_std_volatility(df, close_col = 'CLOSINGPRICE', horizon = 5):

    df_out = get_log_returns_v2(df, 'CLOSINGPRICE', 5)
    return_mean = df_out.log_returns.mean()
    return_std = df_out.log_returns.std()

    return return_mean, return_std

### Garman Klass volatility
![image.png](attachment:image.png)

In [14]:
def GKHV(o,h,l,c):

    volatility = (1/2)*((np.log(h/l))**2) + (2*(np.log(2))-1)*((np.log(c/o))**2)

    return volatility

### Rogers and Satchell

![image.png](attachment:image.png)

In [15]:
def RS(o,h,l,c,prev_c):

    u = np.log(h/o)
    c = np.log(c/o)
    d = np.log(l/o)


    volatility = u*(u-c) + d*(d-c)

    return volatility


### Yand and Zhang
![image.png](attachment:image.png)

In [16]:
symbol_volatilities = pd.DataFrame()
v_dict = {}
symbols = data
for symbol in symbols:
    # print(symbol)
    v_dict['symbol'] = symbol

    symbol_df = data.loc[data.SECURITYCODE == symbol].sort_values(by = 'UNIX_TS').reset_index(drop = True)
    
    symbol_df['gkhv'] = symbol_df.apply(lambda row: GKHV(row['OPENINGPRICE'],row['HIGHPX'],row['LOWPX'],row['CLOSINGPRICE']), axis = 1)
    gkhv = np.sqrt(symbol_df.gkhv.mean())
    v_dict['gkhv'] = gkhv

    symbol_df['prev_c'] = symbol_df['CLOSINGPRICE'].shift(1)
    symbol_df['rs'] = symbol_df.apply(lambda row: RS(row['OPENINGPRICE'],row['HIGHPX'],row['LOWPX'],row['CLOSINGPRICE'], row['prev_c']), axis = 1)
    rs = np.sqrt(symbol_df.rs.mean())
    v_dict['rs'] = rs

    symbol_df['norm_o'] = symbol_df['OPENINGPRICE']/symbol_df['CLOSINGPRICE'].shift(1)
    symbol_df['norm_c'] = symbol_df['CLOSINGPRICE']/symbol_df['OPENINGPRICE']
    k = (0.34/(1.34 + ((len(symbol_df)+1)/(len(symbol_df)-1))))
    yangzhang = np.sqrt((symbol_df['norm_o'].std()**2) + (k*symbol_df['norm_c'].std()**2) + (1-k)*symbol_df['rs'].mean())
    v_dict['yangzhang'] = yangzhang

    symbol_volatilities = pd.concat([symbol_volatilities, pd.DataFrame([v_dict])], ignore_index = True)

### get_volatlity_insights

In [17]:
def get_volatility_insights(symbol_df):

    symbol_df['gkhv'] = symbol_df.apply(lambda row: GKHV(row['OPENINGPRICE'],row['HIGHPX'],row['LOWPX'],row['CLOSINGPRICE']), axis = 1)
    gkhv = (np.sqrt(symbol_df.gkhv.mean()))*100

    symbol_df['prev_c'] = symbol_df['CLOSINGPRICE'].shift(1)
    symbol_df['rs'] = symbol_df.apply(lambda row: RS(row['OPENINGPRICE'],row['HIGHPX'],row['LOWPX'],row['CLOSINGPRICE'], row['prev_c']), axis = 1)
    rs = (np.sqrt(symbol_df.rs.mean()))*100

    symbol_df['norm_o'] = symbol_df['OPENINGPRICE']/symbol_df['CLOSINGPRICE'].shift(1)
    symbol_df['norm_c'] = symbol_df['CLOSINGPRICE']/symbol_df['OPENINGPRICE']
    k = (0.34/(1.34 + ((len(symbol_df)+1)/(len(symbol_df)-1))))
    
    yangzhang = np.sqrt((symbol_df['norm_o'].std()**2) + (k*symbol_df['norm_c'].std()**2) + (1-k)*symbol_df['rs'].mean())*100
    
    return gkhv, rs, yangzhang

## Drowdown

In [18]:
# returns percentage
def max_drowdown(symb_df,close):
    max_prs = close.rolling(window = len(symb_df), min_periods = 1).max()
    dd = (close/max_prs) - 1
    max_dd = dd.rolling(window = len(dd), min_periods=1).min()
    return max_dd.min()*100


## Returns

In [19]:
def find_act_fday(symbol_df ,possible_dates):

    for pos_date in possible_dates:
    # print(pos_date)
        if pos_date in symbol_df.TRADEDATE.values:
            return pos_date
            # break
    
    # return pos_date

### get_yr_2_date

In [20]:
#returns the percentage value
def get_yr_2_date(symbol_df):
    # fday = pd.Timestamp(datetime((pd.Timestamp.today().year),1,1).date())

    # fixing current date as per the dataset. ideally should be the actual current date
    fday = pd.Timestamp(datetime((symbol_df.TRADEDATE.max().year),1,1).date())

    possible_dates = pd.bdate_range(fday, fday+pd.offsets.BusinessDay(n=5))

    act_fday = find_act_fday(symbol_df,possible_dates)
    
    fday_value = symbol_df.loc[symbol_df.TRADEDATE == act_fday].CLOSINGPRICE.iloc[0]
    today_value = symbol_df.iloc[-1].CLOSINGPRICE

    yr_2_dt_return = (today_value - fday_value)/fday_value

    yr_2_date_df = symbol_df[symbol_df.TRADEDATE >= act_fday].sort_values(by = 'UNIX_TS')
    gkhv, rs, yangzhang = get_volatility_insights(yr_2_date_df)

    max_dd = max_drowdown(yr_2_date_df, yr_2_date_df['CLOSINGPRICE'])

    # return yr_2_dt_return*100, max_dd, gkhv, rs, yangzhang
    return round(yr_2_dt_return*100, 2), round(max_dd, 2) , round(gkhv, 2), round(rs, 2), round(yangzhang, 2)

### get_yr

In [21]:
# returns the percentage value
def get_yr(symbol_df):

    fday = symbol_df.TRADEDATE.iloc[-1] - pd.DateOffset(years=1)
    possible_dates = pd.bdate_range(fday, fday+pd.offsets.BusinessDay(n=20))
    
    act_fday = find_act_fday(symbol_df,possible_dates)
    
    fday_value = symbol_df.loc[symbol_df.TRADEDATE == act_fday].CLOSINGPRICE.iloc[0]
    today_value = symbol_df.iloc[-1].CLOSINGPRICE

    yr_return = (today_value - fday_value)/fday_value

    yr_df = symbol_df[symbol_df.TRADEDATE >= act_fday].sort_values(by = 'UNIX_TS')
    gkhv, rs, yangzhang = get_volatility_insights(yr_df)

    max_dd = max_drowdown(yr_df, yr_df['CLOSINGPRICE'])

    # return yr_return*100, max_dd, gkhv, rs, yangzhang
    return round(yr_return*100, 2), round(max_dd, 2) , round(gkhv, 2), round(rs, 2), round(yangzhang, 2)

### get_mn

In [22]:
# returns the percentage value
def get_mn(symbol_df):

    fday = symbol_df.TRADEDATE.iloc[-1] - pd.DateOffset(months=1)
    possible_dates = pd.bdate_range(fday, fday+pd.offsets.BusinessDay(n=5))

    act_fday = find_act_fday(symbol_df,possible_dates)
    
    fday_value = symbol_df.loc[symbol_df.TRADEDATE == act_fday].CLOSINGPRICE.iloc[0]
    today_value = symbol_df.iloc[-1].CLOSINGPRICE

    mn_return = (today_value - fday_value)/fday_value

    mn_df = symbol_df[symbol_df.TRADEDATE >= act_fday].sort_values(by = 'UNIX_TS')
    gkhv, rs, yangzhang = get_volatility_insights(mn_df)

    max_dd = max_drowdown(mn_df, mn_df['CLOSINGPRICE'])

    return round(mn_return*100, 2), round(max_dd, 2) , round(gkhv, 2), round(rs, 2), round(yangzhang, 2)

In [23]:
def get_insights(symbol):
    insight_dict = {'year_to_date':{}, 'last_year':{}, 'last_month' : {}}
    symbol_df = data.loc[data.SECURITYCODE == symbol].sort_values(by = 'UNIX_TS')

    insight_dict['year_to_date']['yr_2_dt_return'], insight_dict['year_to_date']['yr_2_dt_mx_dd'], insight_dict['year_to_date']['gkhv'], insight_dict['year_to_date']['rs'], insight_dict['year_to_date']['yangzhang'] = get_yr_2_date(symbol_df)
    insight_dict['last_year']['yr_2_dt_return'], insight_dict['last_year']['yr_2_dt_mx_dd'], insight_dict['last_year']['gkhv'], insight_dict['last_year']['rs'], insight_dict['last_year']['yangzhang'] = get_yr(symbol_df)
    insight_dict['last_month']['yr_2_dt_return'], insight_dict['last_month']['yr_2_dt_mx_dd'], insight_dict['last_month']['gkhv'], insight_dict['last_month']['rs'], insight_dict['last_month']['yangzhang'] = get_mn(symbol_df)

    return insight_dict

In [24]:
def get_insights_html(symbol):
    insight_dict = {'year_to_date':{}, 'last_year':{}, 'last_month' : {}}
    symbol_df = data.loc[data.SECURITYCODE == symbol].sort_values(by = 'UNIX_TS')

    y2d = get_yr_2_date(symbol_df)
    yr = get_yr(symbol_df)
    mn = get_mn(symbol_df)

    desc = 'year to date : <br>'\
    '   return : {}%<br>'\
    '   max drow down : {}%<br>'\
    '   gkhv : {}%<br>'\
    '   rs : {}%<br>'\
    '   yangzhang : {}%<br>'\
    ' <br>'\
    'last year : <br>'\
    '   return : {}%<br>'\
    '   max drow down : {}%<br>'\
    '   gkhv : {}%<br>'\
    '   rs : {}%<br>'\
    '   yangzhang : {}%<br>'\
    ' <br>'\
    'last month : <br>'\
    '   return : {}%<br>'\
    '   max drow down : {}%<br>'\
    '   gkhv : {}%<br>'\
    '   rs : {}%<br>'\
    '   yangzhang : {}%<br>'.format(y2d[0],y2d[1],y2d[2],y2d[3],y2d[4],
                        yr[0],yr[1],yr[2],yr[3],yr[4],
                        mn[0],mn[1],mn[2],mn[3],mn[4])
    return desc

# Combined Recommender

In [25]:
price_data = data.copy()
price_data.head(2)


Unnamed: 0,SECURITYCODE,OPENINGPRICE,HIGHPX,LOWPX,CLOSINGPRICE,TRADEDATE,UNIX_TS
0,EXT,7.3,7.5,7.3,7.5,2024-02-07,1707264000
1,COOP,2.3,2.3,2.2,2.2,2024-02-07,1707264000


In [26]:
price_data.shape

(32745, 7)

In [27]:
def quey_generator(domains):
    # query = 'busines in the domain of '+','.join(domains)
    query = ','.join(domains)
    return query

def get_recommendations_gradio(domains, top_k = 5):
  query = quey_generator(domains)
  relevant_stocks = new_db.similarity_search_with_relevance_scores(query= query, k = top_k)
  recommendations = [(relevant_stocks[idx][0].metadata.get('symbol'), relevant_stocks[idx][0].metadata.get('name'), relevant_stocks[idx][1]) for idx in range(len(relevant_stocks))]

  recommended_symbols = []
  reco_images = []
  for idx, data in enumerate(recommendations):
    # if data[2] > 0:
    #     recommended_symbols.append(data[0])
    #     symbol_df = price_data[price_data.SECURITYCODE == data[0]].sort_values(by = 'UNIX_TS').reset_index(drop = True)
    #     reco_img = plot_ohlc_w_des(symbol_df, '{} : {}'.format(data[0], data[1]) ,get_insights_html(data[0]))
    #     reco_images.append(reco_img)
    recommended_symbols.append(data[0])
    symbol_df = price_data[price_data.SECURITYCODE == data[0]].sort_values(by = 'UNIX_TS').reset_index(drop = True)
    reco_img = plot_ohlc_w_des(symbol_df, '{} : {} | {}'.format(data[0], data[1], data[2]) ,get_insights_html(data[0]))
    reco_images.append(reco_img)
  return reco_images



with gr.Blocks() as demo:
    inputs = gr.Dropdown(
        ["finance", "transport", "technology", "agriculture"],
        multiselect=True,
        label="Domains",
        allow_custom_value=True,
        scale=5
    )
    # reco_images = get_recommendations_gradio(inputs)
    # print(inputs)
    # plot_objects = [gr.Plot() for _ in range(len(reco_images))]
    # inputs.change(get_recommendations_gradio, inputs=inputs, outputs=plot_objects)
    # demo.load(get_recommendations_gradio, inputs=[inputs], outputs=plot_objects)
    button = gr.Button("Generate Recommendations")

    # rec1, rec2, rec3, rec4, rec5 = [gr.Plot(), gr.Plot(),gr.Plot(),gr.Plot(),gr.Plot()]
    outputs = [gr.Plot(), gr.Plot(),gr.Plot(),gr.Plot(),gr.Plot()]
    # inputs.change(get_recommendations_gradio, inputs=inputs, outputs=outputs)
    # demo.load(get_recommendations_gradio, inputs=[inputs], outputs=outputs)

    button.click(get_recommendations_gradio, inputs=inputs, outputs=outputs)
    

    demo.launch(share = True)

Running on local URL:  http://127.0.0.1:7860

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


In [28]:
def quey_generator(domains):
    # query = 'busines in the domain of '+','.join(domains)
    query = ','.join(domains)
    return query

def get_recommendations_gradio(domains, top_k = 5):
  query = quey_generator(domains)
  relevant_stocks = new_db.similarity_search_with_relevance_scores(query= query, k = top_k)
  recommendations = [(relevant_stocks[idx][0].metadata.get('symbol'), relevant_stocks[idx][0].metadata.get('name'), relevant_stocks[idx][1]) for idx in range(len(relevant_stocks))]

  recommended_symbols = []
  reco_images = []
  for idx, data in enumerate(recommendations):
    if data[2] > 0:
        recommended_symbols.append(data[0])
        symbol_df = price_data[price_data.SECURITYCODE == data[0]].sort_values(by = 'UNIX_TS').reset_index(drop = True)
        reco_img = plot_ohlc_w_des(symbol_df, '{} : {}'.format(data[0], data[1]) ,get_insights_html(data[0]))
        reco_images.append(reco_img)
  return reco_images


with gr.Blocks() as demo:
    inputs = gr.Dropdown(
        ["finance", "transport", "technology", "agriculture"],
        multiselect=True,
        label="Domains",
        allow_custom_value=True,
        scale=5
    )

    button = gr.Button("Generate Recommendations")
    outputs = gr.Blocks()  # Container for variable number of plots

    def handle_button_click(domains):
        reco_images = get_recommendations_gradio(domains)
        outputs = gr.Blocks()
        outputs.clear()  # Clear existing plots
        outputs = gr.Blocks()  # Create a new empty Blocks container
        for i, image in enumerate(reco_images):
            outputs.append(gr.Plot(label=f"Recommendation {i+1}", value=image))  # Use append


    button.click(handle_button_click, inputs=inputs, outputs=outputs)

    # Initial load (optional for pre-populating outputs)
    # demo.load(get_recommendations_gradio, inputs=[inputs], outputs=outputs)

    demo.launch(share=True)

Running on local URL:  http://127.0.0.1:7861

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


In [29]:
sys.exit()

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)



Relevance scores must be between 0 and 1, got [(Document(page_content="E - CHANNELLING PLC digital lifestyle solutions for healthcare and other industries of Sri Lanka. The Company's main product is a software system, which provides channeling of medical practitioners.\xa0", metadata={'symbol': 'ECL', 'name': 'E - CHANNELLING PLC'}), 0.1030208233349047), (Document(page_content='SRI LANKA TELECOM PLC \xa0information and communications technology (ICT) solutions provider. The Company is primarily involved in providing a portfolio of telecommunication services across Sri Lanka. In addition, the range of services provided by the Company include, inter-alia, Internet services, data services, domestic and international leased circuits, broadband, satellite uplink, maritime transmission, IPTV service and directory publishing service.', metadata={'symbol': 'SLTL', 'name': 'SRI LANKA TELECOM PLC'}), 0.005522012912471452), (Document(page_content='GESTETNER OF CEYLON PLC importing and selling of

# Sandbox

In [None]:
data.head()

Unnamed: 0,SECURITYCODE,OPENINGPRICE,HIGHPX,LOWPX,CLOSINGPRICE,TRADEDATE,UNIX_TS
0,NDB,64.9,64.9,63.9,63.9,2024-02-07,1707264000
1,LLUB,95.0,96.0,92.1,94.4,2024-02-07,1707264000
2,COMB,88.0,89.0,88.0,89.0,2024-02-07,1707264000
3,DFCC,78.0,78.0,76.0,77.1,2024-02-07,1707264000
4,HAYL,75.0,75.6,74.9,75.0,2024-02-07,1707264000


In [None]:
symbol_df = data.loc[data.SECURITYCODE == 'NDB'].reset_index(drop = True)
symbol_df

Unnamed: 0,SECURITYCODE,OPENINGPRICE,HIGHPX,LOWPX,CLOSINGPRICE,TRADEDATE,UNIX_TS
0,NDB,64.9,64.9,63.9,63.9,2024-02-07,1707264000
1,NDB,62.5,64.8,62.5,64.1,2024-02-06,1707177600
2,NDB,61.1,64.7,61.0,63.3,2024-02-02,1706832000
3,NDB,61.0,61.1,60.9,61.0,2024-02-01,1706745600
4,NDB,61.0,61.5,60.8,61.0,2024-01-31,1706659200
...,...,...,...,...,...,...,...
464,NDB,69.4,71.5,69.0,70.5,2022-02-14,1644796800
465,NDB,68.9,69.3,68.1,68.6,2022-02-11,1644537600
466,NDB,68.7,69.0,67.5,67.9,2022-02-10,1644451200
467,NDB,69.3,69.8,68.5,68.6,2022-02-09,1644364800


In [None]:
symbol_df.TRADEDATE.max()

Timestamp('2024-02-07 00:00:00')

In [None]:
def quey_generator(domains):
    query = 'business that belong in the domains of '+','.join(domains)
    return query

def get_recommendations_gradio(domains, top_k = 5):
  query = quey_generator(domains)
  relevant_stocks = new_db.similarity_search_with_relevance_scores(query= query, k = top_k)
  recommendations = [(relevant_stocks[idx][0].metadata.get('symbol'), relevant_stocks[idx][0].metadata.get('name'), relevant_stocks[idx][1]) for idx in range(len(relevant_stocks))]

  recommended_symbols = []
  reco_images = []
  for idx, data in enumerate(recommendations):
    
    recommended_symbols.append(data[0])
    symbol_df = price_data[price_data.SECURITYCODE == data[0]].sort_values(by = 'UNIX_TS').reset_index(drop = True)
    reco_img = plot_ohlc_w_des(symbol_df, '{} : {}'.format(data[0], data[1]) ,get_insights_html(data[0]))
    reco_images.append(reco_img)
  return reco_images

with gr.Blocks() as demo:
    inputs = gr.Dropdown(
        ["finance", "transport", "technology", "agriculture"],
        multiselect=True,
        label="Domains",
        allow_custom_value=True,
        scale=5
    )
    rec1, rec2, rec3, rec4, rec5 = [gr.Plot(), gr.Plot(),gr.Plot(),gr.Plot(),gr.Plot()]

    inputs.change(get_recommendations_gradio, inputs=inputs, outputs=[rec1, rec2, rec3, rec4, rec5])
    demo.load(get_recommendations_gradio, inputs=[inputs], outputs=[rec1, rec2, rec3, rec4, rec5])
    demo.launch(share = True)

Running on local URL:  http://127.0.0.1:7860

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


  rs = (np.sqrt(symbol_df.rs.mean()))*100
  rs = (np.sqrt(symbol_df.rs.mean()))*100

invalid value encountered in sqrt


invalid value encountered in sqrt


invalid value encountered in sqrt


invalid value encountered in sqrt

Traceback (most recent call last):
  File "c:\Users\bpadmin\anaconda3\envs\tf_tr_recommender\lib\site-packages\gradio\queueing.py", line 495, in call_prediction
    output = await route_utils.call_process_api(
  File "c:\Users\bpadmin\anaconda3\envs\tf_tr_recommender\lib\site-packages\gradio\route_utils.py", line 231, in call_process_api
    output = await app.get_blocks().process_api(
  File "c:\Users\bpadmin\anaconda3\envs\tf_tr_recommender\lib\site-packages\gradio\blocks.py", line 1594, in process_api
    result = await self.call_function(
  File "c:\Users\bpadmin\anaconda3\envs\tf_tr_recommender\lib\site-packages\gradio\blocks.py", line 1176, in call_function
    prediction = await anyio.to_thread.run_sync(
  File "c:\Users\bpadmin\anaconda3\envs\tf_tr_recom

In [None]:
data_path = '../../data/SL20 Two years to Narada.xls'

data = pd.read_excel(data_path, index_col= False)



In [None]:
def split_(stock_name):
    return stock_name.split('.')[0]

In [None]:
data['SECURITYCODE'] = data['SECURITYCODE'].apply(lambda x : split_(x))

In [None]:
data = data[['SECURITYCODE','OPENINGPRICE','HIGHPX', 'LOWPX', 'CLOSINGPRICE', 'TRADEDATE', 'UNIX_TS']].dropna(axis= 0, how = 'any')
# data

In [None]:
# data.SECURITYCODE.value_counts()

In [None]:
end_date = data.TRADEDATE.max()
end_date

Timestamp('2024-02-07 00:00:00')

In [None]:
sl20_symbols = data.SECURITYCODE.unique().tolist()
new_symbols = price_data.SECURITYCODE.unique().tolist()

In [None]:
sl20_v2 = data.copy()

for sl20_symbol in sl20_symbols:
    if sl20_symbol in(new_symbols):
        print(sl20_symbol)
        sl20_v2 = sl20_v2.drop(sl20_v2[sl20_v2.SECURITYCODE == sl20_symbol].index)
        


VONE
CIC
DIAL
LIOC
LOLC
AAIC


In [None]:
sl20_v2.shape[0], data.shape[0]

sl20v2_symbols = sl20_v2.SECURITYCODE.unique().tolist()

for sl20_symbol in sl20v2_symbols:
    if sl20_symbol in(new_symbols):
        print(sl20_symbol)

In [None]:
sl20_v2.shape[0] + price_data.shape[0]

35178

In [None]:
comb_data = pd.concat([sl20_v2, price_data], ignore_index=True)
comb_data

Unnamed: 0,SECURITYCODE,OPENINGPRICE,HIGHPX,LOWPX,CLOSINGPRICE,TRADEDATE,UNIX_TS
0,NDB,64.90,64.90,63.90,63.90,2024-02-07,1707264000
1,LLUB,95.00,96.00,92.10,94.40,2024-02-07,1707264000
2,COMB,88.00,89.00,88.00,89.00,2024-02-07,1707264000
3,DFCC,78.00,78.00,76.00,77.10,2024-02-07,1707264000
4,HAYL,75.00,75.60,74.90,75.00,2024-02-07,1707264000
...,...,...,...,...,...,...,...
35173,APLA,742.75,750.00,701.00,739.50,2022-02-08,1644278400
35174,AEL,33.50,33.50,31.20,31.70,2022-02-08,1644278400
35175,ACL,115.00,118.00,110.00,112.75,2022-02-08,1644278400
35176,ABAN,202.25,202.25,202.25,202.25,2022-02-08,1644278400


In [None]:
comb_data = comb_data[comb_data.TRADEDATE <= end_date]
comb_data.shape

(34481, 7)

In [None]:
comb_data.TRADEDATE.max(), comb_data.TRADEDATE.min(), data.TRADEDATE.min()

(Timestamp('2024-02-07 00:00:00'),
 Timestamp('2022-02-08 00:00:00'),
 Timestamp('2022-02-08 00:00:00'))

In [None]:
from operator import index


comb_data.to_excel('../../data/phase_2/price_data.xlsx', index = False)

In [None]:
price_data.SECURITYCODE.nunique()

68

In [None]:
stock_data_excel_file = pd.ExcelFile('../../data/selected_symbols_w_gics.xlsx')
stock_data = pd.read_excel(stock_data_excel_file, 'phase 1 symbols reduced')
stock_data.head(2)

Unnamed: 0,GICS Category,Symbol,Name,Business_Summary
0,Information Technology,HBS,hSenid Business Solutions PLC,development of human capital management (HCM)...
1,Consumer Discretionary,TYRE,KELANI TYRES PLC,"importation and sale of tires. In addition, th..."


In [None]:
pr_symbols = list(price_data.SECURITYCODE.unique())
details_symbols = list(stock_data.Symbol.unique())

In [None]:
set(pr_symbols) == set(details_symbols)

True

In [None]:
price_data

Unnamed: 0,SECURITYCODE,OPENINGPRICE,HIGHPX,LOWPX,CLOSINGPRICE,TRADEDATE,UNIX_TS
0,EXT,7.60,7.60,7.60,7.60,2024-02-26,1708905600
1,MDL,8.10,8.10,8.10,8.60,2024-02-26,1708905600
2,SDF,12.40,12.50,12.40,12.50,2024-02-26,1708905600
3,COOP,2.20,2.30,2.10,2.20,2024-02-26,1708905600
4,LCBF,1.80,1.80,1.80,1.80,2024-02-26,1708905600
...,...,...,...,...,...,...,...
65482,APLA,742.75,750.00,701.00,739.50,2022-02-08,1644278400
65483,AEL,33.50,33.50,31.20,31.70,2022-02-08,1644278400
65484,ACL,115.00,118.00,110.00,112.75,2022-02-08,1644278400
65486,ABAN,202.25,202.25,202.25,202.25,2022-02-08,1644278400


In [None]:
good_symbols = [symbol for symbol,val in (price_data.SECURITYCODE.value_counts()>250).to_dict().items() if val==True]
# good_symbols

In [None]:
price_data[price_data.SECURITYCODE.isin(good_symbols)].index

Int64Index([    0,     1,     2,     3,     4,     5,    13,    14,    18,
               19,
            ...
            65471, 65473, 65477, 65479, 65481, 65482, 65483, 65484, 65486,
            65488],
           dtype='int64', length=28505)

In [None]:
price_data_1 = price_data.drop(price_data[~price_data.SECURITYCODE.isin(good_symbols)].index)
price_data_1.shape

(28505, 7)

In [None]:
symbol_groups = price_data_1.groupby(by = 'SECURITYCODE')
symbol_groups

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x00000288DCDAC070>

In [None]:
price_data_1.TRADEDATE.max()

Timestamp('2024-02-26 00:00:00')

In [None]:
filtered_symbols = price_data_1.groupby('SECURITYCODE').filter(lambda x: x['TRADEDATE'].max() == price_data_1.TRADEDATE.max())['SECURITYCODE'].unique()

In [None]:
len(filtered_symbols), price_data_1.SECURITYCODE.nunique()

(60, 64)