In [2]:
import os
from os import listdir
from os.path import isdir, isfile, join
import pandas as pd

rootdir = '../data/stocks'

stock_list = [f for f in listdir(rootdir) if isdir(join(rootdir, f))]


In [3]:
from pathlib import Path
import shutil

def handle_stock_min(path, stock_name):
  stock_dir = os.path.join(path, stock_name)
  raw_data_dir = os.path.join(stock_dir, 'raw-minute')
  file_list = [join(raw_data_dir, f) for f in listdir(raw_data_dir) if isfile(join(raw_data_dir, f))]
  data = None
  for file in file_list:
    if data is None:
      data = pd.read_json(file)
      print('first')
      print(len(data))
    else:
      data = pd.concat([data, pd.read_json(file)]).drop_duplicates().reset_index(drop=True)
      print('second')
      print(len(data))
  if data is None:
    return
  data['date']=data['timestamp'].dt.strftime('%Y%m%d')
  grouped = data.groupby('date')
  Path(os.path.join(stock_dir, 'minute')).mkdir(exist_ok=True)
  for name, group in grouped:
    group.to_json(f'{stock_dir}/minute/data-{name}')
  for file in file_list:
    Path(os.path.join(raw_data_dir, 'bak')).mkdir(exist_ok=True)
    filename = file.split('/')[-1]
    new_file = os.path.join(raw_data_dir, 'bak', filename)
    shutil.move(file, new_file)

In [4]:
from pathlib import Path
import shutil

def handle_stock_daily(path, stock_name):
  stock_dir = os.path.join(path, stock_name)
  raw_data_dir = os.path.join(stock_dir, 'raw-daily')
  file_list = [join(raw_data_dir, f) for f in listdir(raw_data_dir) if isfile(join(raw_data_dir, f)) and f.startswith('data')]
  data = None
  target_file = join(stock_dir, 'daily', 'data')
  if isfile(target_file):
    data = pd.read_json(target_file)
  
  updated = False
  for file in file_list:
    if data is None:
      data = pd.read_json(file)
      print('first')
      print(len(data))
    else:
      data = pd.concat([data, pd.read_json(file)]).drop_duplicates().reset_index(drop=True)
      print('second')
      print(len(data))
    updated = True
  if data is None:
    return

  if updated:
    data['date']=data['timestamp'].dt.strftime('%Y%m%d')
    Path(os.path.join(stock_dir, 'daily')).mkdir(exist_ok=True)
    data.to_json(f'{stock_dir}/daily/data')
    
  for file in file_list:
    Path(os.path.join(raw_data_dir, 'bak')).mkdir(exist_ok=True)
    filename = file.split('/')[-1]
    new_file = os.path.join(raw_data_dir, 'bak', filename)
    shutil.move(file, new_file)

In [5]:
for stock in stock_list:
  print(f"Processing stock: {stock}")
  handle_stock_min('../data/stocks', stock)
  handle_stock_daily('../data/stocks', stock)

Processing stock: PFE
first
1951
first
5035
Processing stock: HEXA-B.ST
first
2229
first
5067
Processing stock: NSRGY
first
1950
first
5035
Processing stock: NVDA
first
1951
first
5035
Processing stock: INTC
first
1951
first
5035
Processing stock: TSM
first
1951
first
5035
Processing stock: AMZN
first
1951
first
5035
Processing stock: AMBK
first
780
first
5035
Processing stock: DIS
first
1951
first
5035
Processing stock: ERIC-B.ST
first
2229
first
5067
Processing stock: AZN.ST
first
2229
first
5067
Processing stock: RHHBY
first
1951
first
4737
Processing stock: MSFT
first
1951
first
5035
Processing stock: AAPL
first
1951
first
5035
Processing stock: TSLA
first
1951
first
3011
Processing stock: DDAIF
first
1950
first
5035
Processing stock: GOOGL
first
1951
first
4486
Processing stock: MRK
first
1951
first
5035
Processing stock: AIR.F
first
3585
first
5099
Processing stock: ESSITY-B.ST
first
2228
first
1256
Processing stock: T
first
1951
first
5035


In [6]:
indicator_dir = '../data/indicators'
indicator_list = [f for f in listdir(indicator_dir) if isdir(join(indicator_dir, f))]
for indicator in indicator_list:
  print(f"Processing indicator: {indicator}")
  handle_stock_min(indicator_dir, indicator)
  handle_stock_daily(indicator_dir, indicator)

Processing indicator: EURSEK=X
first
6425
first
5218
Processing indicator: ZS=F
first
6115
first
5081
Processing indicator: ^OMX
first
2229
first
3405
Processing indicator: ^DJI
first
1951
first
5035
Processing indicator: EUR=X
first
6423
first
4837
Processing indicator: GC=F
first
6115
first
5081
Processing indicator: SB=F
first
5894
first
5081
Processing indicator: ZB=F
first
6115
first
5081
Processing indicator: SI=F
first
6115
first
5081
Processing indicator: HG=F
first
6115
first
5081
Processing indicator: SEK=X
first
6425
first
5218
Processing indicator: LE=F
first
4593
first
5080
Processing indicator: CL=F
first
6115
first
5081
