In [1]:
import requests,json,datetime,time
import pandas as pd
from io import StringIO as sio
pd.options.display.float_format = '{:,.2f}'.format
import matplotlib.pyplot as plt
# import plotly.express as px

In [2]:

header={
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36 Edg/90.0.818.66',
    'Accept': '*/*',
    'content-type': 'application/json',
    'Accept-Encoding': 'gzip, deflate, br',
    }

def getaio(df):
    buyer_stack=df.pivot_table(index="symbol", columns='buyer',values=['qty','amt'],aggfunc={'qty':['sum','count'],'amt': 'sum'}).stack(future_stack=True).dropna()
    buyer_stack.columns.droplevel()
    buyer_stack.columns='amount count qty'.split()
    buyer_stack.columns='buy_'+buyer_stack.columns
    seller_stack=df.pivot_table(index="symbol", columns='seller',values=['qty','amt'],aggfunc={'qty':['sum','count'],'amt': 'sum'}).stack(future_stack=True).dropna()
    seller_stack.columns.droplevel()
    seller_stack.columns='amount count qty'.split()
    seller_stack.columns='sale_'+seller_stack.columns
    buy_sale_stack=pd.concat([buyer_stack,seller_stack],axis=1)
    buy_sale_stack.index.names=('symbol','broker')
    buy_sale_stack.fillna(0,inplace=True)
    buy_pct=buy_sale_stack.buy_qty.groupby(level=0,group_keys=False).apply(lambda x: 100* x/x.sum())
    sale_pct=buy_sale_stack.sale_qty.groupby(level=0,group_keys=False).apply(lambda x: 100* x/x.sum())
    aio=pd.concat([buy_sale_stack,buy_pct,sale_pct],axis=1)
    col_name=list(aio.columns)
    col_name[-2:]=["buy_pct",'sale_pct']
    aio.columns=col_name
    aio.fillna(0,inplace=True)
    return aio

def latest_n_days(df,n):
  unk=df.index.unique(level='date').sort_values(ascending=False)
  return df.loc[unk[:n].sort_values()]

chart_provider={
'nepsechart':'https://ohlcv.nepsechart.com/history?symbol={symbol}&resolution={resolution}&from={fromtime}&to={totime}',
'merocapital':'https://chartdata.merocapital.com/datafeed1/history?symbol={symbol}&resolution={resolution}&from={fromtime}&to={totime}',
'merolaganida':'https://da.merolagani.com/handlers/TechnicalChartHandler.ashx?type=get_advanced_chart&symbol={symbol}&resolution={resolution}&rangeStartDate={fromtime}&rangeEndDate={totime}&from=&isAdjust=1&currencyCode=NPR',
'merolagani':'https://www.merolagani.com/handlers/TechnicalChartHandler.ashx?type=get_advanced_chart&symbol={symbol}&resolution={resolution}&rangeStartDate={fromtime}&rangeEndDate={totime}&from=&isAdjust=1&currencyCode=NPR',
'nepsealpha':'https://nepsealpha.com/trading/1/history?symbol={symbol}&resolution={resolution}&from={fromtime}&to={totime}&pass=ok&force=261651&currencyCode=NRS',
'nepsedata':'https://nepsedata.com/history?symbol={symbol}&resolution={resolution}&from={fromtime}&to={totime}&currencyCode=NRS'
}
def chart_data(provider='merolagani',symbol="NEPSE",fromtime=datetime.date(2022,6,1),totime=datetime.datetime.now(),resolution="1D"):
    """returns data fetched from nepsealpha,merocapital,merolagani and nepsechart.\nwarning!! \nNepsechart,merocapital takes D for daily resolution.\nMerocapital,nepsedata gives unadjusted chart)"""
    fromtime =int(time.mktime(fromtime.timetuple()))
    totime =int(time.mktime(totime.timetuple()))
    url=chart_provider.get(provider)
    df=pd.read_json(sio(requests.get(url.format(symbol=symbol,resolution=resolution,fromtime=fromtime,totime=totime),headers=header).text))
    df['t']=df['t'].apply(datetime.datetime.utcfromtimestamp)
    df.drop('s',axis=1,inplace=True)
    return df

In [3]:
df=chart_data(provider='merolagani',symbol="NEPSE",fromtime=datetime.date(2014,5,5),totime=datetime.datetime.now(),resolution="1D")

In [5]:
import os
import pandas as pd
from multiprocessing import Pool, Manager
from functools import partial

In [6]:
REQUIRED_COLUMNS = ['contract', 'symbol', 'buyer', 'seller', 'qty', 'rate', 'amt']

In [7]:
def get_unique_dates(df):
    return df['t'].dt.normalize().drop_duplicates()

def build_file_path(date_obj):
    y = date_obj.strftime('%Y')
    d = date_obj.strftime('%Y-%m-%d')
    return os.path.join('fs', y, f'{d}'),d

# Worker function for each date
def check_file_return(date_obj):
    path, date_str = build_file_path(date_obj)
    print(date_str)
    result = {
        'file_not_found': [],
        'file_column_error': [],
        'column_in_caps': []
    }

    if not os.path.exists(path):
        result['file_not_found'].append(date_str)
        return result

    try:
        df_file = pd.read_csv(path)
        cols_lower = [c.lower() for c in df_file.columns]

        if all(col in cols_lower for col in REQUIRED_COLUMNS):
            if all(col.upper() in df_file.columns for col in REQUIRED_COLUMNS):
                result['column_in_caps'].append(date_str)
        else:
            result['file_column_error'].append(date_str)

    except Exception:
        result['file_column_error'].append(date_str)

    return result


In [9]:
def process_all_dates(df, workers=20):
    dates = get_unique_dates(df)

    with Pool(workers) as pool:
        results = pool.map(check_file_return, dates)

    # Merge all individual results into one
    final_result = {
        'file_not_found': [],
        'file_column_error': [],
        'column_in_caps': []
    }

    for r in results:
        for key in final_result:
            final_result[key].extend(r[key])

    return final_result


In [None]:
process_all_dates(df.head(40))