In [1]:
import re
import requests
from persiantools.jdatetime import JalaliDate
import json
import csv
import pandas as pd

In [2]:
def get_stock_groups():
    r = requests.get("http://www.tsetmc.com/Loader.aspx?ParTree=111C1213")
    groups = re.findall(r"\d{2}", r.text)
    return groups


In [3]:
def get_stock_ids(group):
    url = "http://www.tsetmc.com/tsev2/data/InstValue.aspx?g={}&t=g&s=0"
    r = requests.get(url.format(group))
    if r.status_code != 200:
        return list()
    ids = set(re.findall(r"\d{15,20}", r.text))
    return list(ids)


In [4]:
def get_stock_detail(stock_id: str, group_id: int) -> "stock":
    url = "http://www.tsetmc.com/Loader.aspx?ParTree=151311&i={}".format(stock_id)
    r = requests.get(url)
    print(url)
    stock = {"stock_id": stock_id}
    stock["group_name"] = re.findall(r"LSecVal='([\D]*)',", r.text)[0]
    stock["instId"] = re.findall(r"InstrumentID='([\w\d]*)',", r.text)[0]
    stock["insCode"] = (
        stock_id if re.findall(r"InsCode='(\d*)',", r.text)[0] == stock_id else 0
    )
    stock["baseVol"] = float(re.findall(r"BaseVol=([\.\d]*),", r.text)[0])
    try:
        stock["name"] = re.findall(r"LVal18AFC='([\D]*)',", r.text)[0]
    except:
        return
    try:
        stock["title"] = re.findall(r"Title='([\D]*)',", r.text)[0]
    except:
        return
    try:
        stock["sectorPe"] = float(re.findall(r"SectorPE='([\.\d]*)',", r.text)[0])
    except:
        stock["sectorPe"] = None
    try:
        stock["shareCount"] = float(re.findall(r"ZTitad=([\.\d]*),", r.text)[0])
    except:
        stock["shareCount"] = None

    try:
        stock["estimatedEps"] = float(
            re.findall(r"EstimatedEPS='([\.\d]*)',", r.text)[0]
        )
    except:
        stock["estimatedEps"] = None
    stock["group_id"] = group_id
    if stock["name"] == "',DEven='',LSecVal='',CgrValCot='',Flow='',InstrumentID='":
        return False
    return stock

In [5]:
def get_stock_insind_history(stock_id):
    history = list()
    url2 = "http://tsetmc.com/tsev2/data/clienttype.aspx?i={}".format(stock_id)
#     print(url2)
    try:
        t = requests.get(url2)
        if t.text == "":
            print("Blank url")
            return history
        insind_history_text = t.text
        insind_history_list = insind_history_text.split(";")
        for item in insind_history_list:
            item_list = item.split(",")
            if len(item_list) < 3:
                break
            history_item = {}
            history_item["date"] = item_list[0]
            history_item["ind_buy_count"] = item_list[1]
            history_item["ins_buy_count"] = item_list[2]
            history_item["ind_sell_count"] = item_list[3]
            history_item["ins_sell_count"] = item_list[4]
            history_item["ind_buy_volume"] = item_list[5]
            history_item["ins_buy_volume"] = item_list[6]
            history_item["ind_sell_volume"] = item_list[7]
            history_item["ins_sell_volume"] = item_list[8]
            history_item["ind_buy_value"] = item_list[9]
            history_item["ins_buy_value"] = item_list[10]
            history_item["ind_sell_value"] = item_list[11]
            history_item["ins_sell_value"] = item_list[12]
            history.append(history_item)
    except:
        print("************ ERROR:", stock_id)
    return history

def get_lateset_InsInd_data():


    print("Start getting groups")
    groups = sorted(list(set(get_stock_groups())))
    print("Getting groups done", len(groups))
    all_stock_data = list()
    
    counter = 0
    cnt = 0
    for i, group in enumerate(groups):
        cnt += 1
#         if cnt == 7:
#             break
        stocks = get_stock_ids(group)
        if len(stocks)==0:
            continue
        print("Group parsed", group)
        for stock_id in stocks:
            counter = counter + 1
            print("Parsed stock count", counter)
            stock = get_stock_detail(stock_id,int(group))
            if stock == False or stock == None or type(stock) is not dict:
                continue
            stock["history"] = get_stock_insind_history(stock_id)

            all_stock_data.append(stock) 

    L = list()
    STOCK_IDS = list()
    NAMES = list()
    DATES = list()
    IDS = list()
    ind_buy_count = list()
    ins_buy_count = list()
    ind_sell_count = list()
    ins_sell_count = list()
    ind_buy_volume = list()
    ins_buy_volume = list()
    ind_sell_volume = list()
    ins_sell_volume = list()
    ind_buy_value = list()
    ins_buy_value = list()
    ind_sell_value = list()
    ins_sell_value = list()
    for stock in all_stock_data:
        for stockItem in stock["history"]:
    #         print(stockItem['date'])
            IDS.append(stock['stock_id'])
            NAMES.append(stock['name'])
            DATES.append(stockItem['date'])
            ind_buy_count.append(stockItem['ind_buy_count'])
            ins_buy_count.append(stockItem['ins_buy_count'])
            ind_sell_count.append(stockItem['ind_sell_count'])
            ins_sell_count.append(stockItem['ins_sell_count'])
            ind_buy_volume.append(stockItem['ind_buy_volume'])
            ins_buy_volume.append(stockItem['ins_buy_volume'])
            ind_sell_volume.append(stockItem['ind_sell_volume'])
            ins_sell_volume.append(stockItem['ins_sell_volume'])
            ind_buy_value.append(stockItem['ind_buy_value'])
            ins_buy_value.append(stockItem['ins_buy_value'])
            ind_sell_value.append(stockItem['ind_sell_value'])
            ins_sell_value.append(stockItem['ins_sell_value'])

#         L.append(list(zip(IDS, NAMES, DATES, ind_buy_count, ins_buy_count, ind_sell_count, ins_sell_count, ind_buy_volume, ins_buy_volume, ind_sell_volume, ins_sell_volume, ind_buy_value, ins_buy_value, ind_sell_value, ins_sell_value)))
    data = pd.DataFrame(columns=['ID', 'Name', 'Date', 'ind_buy_count', 'ins_buy_count', 'ind_sell_count', 'ins_sell_count', 'ind_buy_volume', 'ins_buy_volume', 'ind_sell_volume', 'ins_sell_volume', 'ind_buy_value', 'ins_buy_value', 'ind_sell_value', 'ins_sell_value'])
    
    data['ID'] = IDS
    data['Name'] = NAMES
    data['Date'] = DATES
    data['ind_buy_count'] = ind_buy_count
    data['ins_buy_count'] = ins_buy_count
    data['ind_sell_count'] = ind_sell_count
    data['ins_sell_count'] = ins_sell_count
    data['ind_buy_volume'] = ind_buy_volume
    data['ins_buy_volume'] = ins_buy_volume
    data['ind_sell_volume'] = ind_sell_volume
    data['ins_sell_volume'] = ins_sell_volume
    data['ind_buy_value'] = ind_buy_value
    data['ins_buy_value'] = ins_buy_value
    data['ind_sell_value'] = ind_sell_value
    data['ins_sell_value'] = ins_sell_value
    
    X = data.iloc[:, 2].apply(lambda x: str(x)[:4] + '-' + str(x)[4:6] + '-' + str(x)[6:8])
    data['Date'] = X
    data['Date'] = pd.to_datetime(data['Date'], infer_datetime_format=True)
    return data

In [6]:
indsind = get_lateset_InsInd_data()

Start getting groups
