In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [58]:
import os
import time
import json
import pandas as pd
from tqdm import tqdm

In [59]:
root = "drive/MyDrive/data/bank/"
url_bamosz = "https://www.bamosz.hu/alapoldal?isin="

In [71]:
with open(root + "data.json","r",encoding="utf8") as jf:
    stocks = json.load(jf)

In [72]:
for key in tqdm(stocks.keys()):
  file_with_paht = root + "/cleaned/" + key + ".csv"
  if os.path.exists(file_with_paht):
    stocks[key] = {
        "data" : pd.read_csv(file_with_paht, sep=";", decimal=",",encoding='utf-8-sig'),
        "name" : stocks[key]
    }
  else:
    stocks[key] = { "name" : stocks[key] }

100%|██████████| 93/93 [00:00<00:00, 100.51it/s]


In [73]:
extended = {}
while len(list(stocks.keys())) > 0: 
  errors = {}
  pbar = tqdm(stocks.keys())
  for key in pbar:    
    if 'data' in stocks[key].keys():
      try:      
        stocks[key]["new_data"] = pd.read_html(url_bamosz+key, 
                                              attrs={"class" : "dataTable2"}, 
                                              decimal=",", 
                                              thousands='.')[1]
        extended[key] = stocks[key].copy()
        pbar.set_description("Completed: " + key)
      except:
        errors[key] = stocks[key].copy()
        pbar.set_description("Errors: " + key)
      time.sleep(1)

  stocks = errors.copy()
  time.sleep(2)

Completed: HU0000709092: 100%|██████████| 93/93 [04:13<00:00,  2.72s/it]


In [74]:
def cleaner(item):
  return item.replace("%","").replace(",",".").replace(" ","")

def rename_columns(data):
  return data.rename(columns={"Nettó eszközérték": "NEÉ",
                              "Napi bef. jegy forgalom": "Napi befjegy. forgalom",
                              "Napi befjegy forgalom vált." : "Napi befjegy. forgalom változás(%)",
                              "3 havi" : "3 hónapos hozam",
                              "6 havi" : "6 hónapos hozam",
                              "1 éves" : "1 éves hozam",
                              "3 éves" : "3 éves hozam",
                              "5 éves" : "5 éves hozam",
                              "10 éves" : "10 éves hozam"})
  
def reformat_csv(data):
  tmp = data.copy()
  tmp.columns = tmp.values[0]
  tmp = tmp[1:].copy().iloc[::-1]
  tmp = tmp.fillna(0.0)
  tmp["Dátum"] = tmp["Dátum"].apply(lambda x: x[:4] + "-" + x[4:6] + "-" + x[6:])
  tmp["Árfolyam"] = tmp["Árfolyam"].apply(lambda x: float(cleaner(cleaner(str(x)))))
  tmp["Nettó eszközérték"] = tmp["Nettó eszközérték"].apply(lambda x: int(cleaner(str(x))))
  tmp["Napi bef. jegy forgalom"] = tmp["Napi bef. jegy forgalom"].apply(lambda x: float(x))
  tmp["Napi befjegy forgalom vált."] = tmp["Napi befjegy forgalom vált."].apply(lambda x: float(cleaner(str(x))))
  tmp["3 havi"] = tmp["3 havi"].apply(lambda x: float(cleaner(str(x))))
  tmp["6 havi"] = tmp["6 havi"].apply(lambda x: float(cleaner(str(x))))
  tmp["1 éves"] = tmp["1 éves"].apply(lambda x: float(cleaner(str(x))))
  tmp["3 éves"] = tmp["3 éves"].apply(lambda x: float(cleaner(str(x))))
  tmp["5 éves"] = tmp["5 éves"].apply(lambda x: float(cleaner(str(x))))
  tmp["10 éves"] = tmp["10 éves"].apply(lambda x: float(cleaner(str(x))))
  tmp = rename_columns(tmp)
  return tmp

In [75]:
# import copy
# ext2 = copy.deepcopy(extended)

for key in tqdm(extended.keys()):
  extended[key]["new_data"] = reformat_csv(extended[key]["new_data"])

  dates = list(extended[key]["data"]["Dátum"].values)
  tmp = extended[key]["new_data"][~extended[key]["new_data"]["Dátum"].isin(dates)]
  extended[key]["ext_data"] = pd.concat([tmp.iloc[::-1], extended[key]["data"]])

100%|██████████| 91/91 [00:00<00:00, 113.62it/s]


In [83]:
for key in tqdm(extended.keys()):
  extended[key]["ext_data"].to_csv(root + "/extended/" + key + ".csv",
                                   sep=";",
                                   index=False,
                                   encoding='utf-8-sig',
                                   decimal=",")

100%|██████████| 91/91 [00:06<00:00, 15.16it/s]
