In [13]:
import pandas as pd
import os
from tqdm import tqdm

In [16]:
def read_stock_files(directory: str) -> pd.DataFrame:
    df_list = []
    for filename in tqdm(os.listdir(directory)):
        if not filename.endswith(".us.txt"):
            continue
        try:
            file_path = os.path.join(directory, filename)
            df = pd.read_csv(
                file_path,
                parse_dates=["Date"],
                dtype={"Open": float, "High": float, "Low": float, "Close": float},
            )
            stock_name = filename.split(".")[0]
            df['Name'] = stock_name 
            df_list.append(df)
        except pd.errors.EmptyDataError:
            continue
    all_data = pd.concat(df_list, ignore_index=True)
    return all_data



In [17]:
stocks_df = read_stock_files('../Stocks')

  0%|          | 6/7195 [00:00<02:00, 59.55it/s]

  2%|▏         | 126/7195 [00:01<01:07, 104.18it/s]

File ../Stocks/sail.us.txt is empty.


  2%|▏         | 169/7195 [00:01<01:17, 90.73it/s] 

File ../Stocks/molc.us.txt is empty.
File ../Stocks/pbio.us.txt is empty.


  6%|▋         | 455/7195 [00:04<00:56, 118.48it/s]

File ../Stocks/boxl.us.txt is empty.


 15%|█▌        | 1084/7195 [00:08<00:44, 137.04it/s]

File ../Stocks/accp.us.txt is empty.


 20%|█▉        | 1433/7195 [00:11<00:44, 130.84it/s]

File ../Stocks/jt.us.txt is empty.
File ../Stocks/amrh.us.txt is empty.


 21%|██        | 1517/7195 [00:12<00:42, 134.24it/s]

File ../Stocks/ehr.us.txt is empty.
File ../Stocks/pxus.us.txt is empty.


 24%|██▍       | 1720/7195 [00:13<00:36, 149.27it/s]

File ../Stocks/hayu.us.txt is empty.


 29%|██▉       | 2113/7195 [00:17<00:38, 131.50it/s]

File ../Stocks/sfix.us.txt is empty.


 32%|███▏      | 2291/7195 [00:19<00:40, 120.67it/s]

File ../Stocks/fmax.us.txt is empty.


 33%|███▎      | 2391/7195 [00:20<00:49, 97.63it/s] 

File ../Stocks/bolt.us.txt is empty.


 43%|████▎     | 3108/7195 [00:25<00:28, 144.95it/s]

File ../Stocks/stnl.us.txt is empty.
File ../Stocks/vmet.us.txt is empty.


 46%|████▌     | 3290/7195 [00:27<00:30, 129.17it/s]

File ../Stocks/bbrx.us.txt is empty.


 48%|████▊     | 3448/7195 [00:28<00:28, 132.63it/s]

File ../Stocks/mapi.us.txt is empty.


 53%|█████▎    | 3841/7195 [00:31<00:24, 139.13it/s]

File ../Stocks/gnst.us.txt is empty.


 58%|█████▊    | 4184/7195 [00:33<00:19, 157.31it/s]

File ../Stocks/wnfm.us.txt is empty.


 66%|██████▌   | 4729/7195 [00:37<00:17, 137.69it/s]

File ../Stocks/rbio.us.txt is empty.


 67%|██████▋   | 4801/7195 [00:38<00:17, 134.13it/s]

File ../Stocks/asns.us.txt is empty.


 76%|███████▌  | 5446/7195 [00:43<00:13, 130.45it/s]

File ../Stocks/amrhw.us.txt is empty.
File ../Stocks/wspt.us.txt is empty.


 76%|███████▋  | 5487/7195 [00:43<00:15, 112.50it/s]

File ../Stocks/scci.us.txt is empty.


 77%|███████▋  | 5549/7195 [00:44<00:15, 105.88it/s]

File ../Stocks/sbt.us.txt is empty.


 78%|███████▊  | 5585/7195 [00:44<00:14, 111.44it/s]

File ../Stocks/znwaa.us.txt is empty.


 79%|███████▉  | 5667/7195 [00:44<00:11, 135.94it/s]

File ../Stocks/srva.us.txt is empty.
File ../Stocks/bxg.us.txt is empty.


 80%|███████▉  | 5731/7195 [00:45<00:10, 144.27it/s]

File ../Stocks/send.us.txt is empty.


 83%|████████▎ | 5975/7195 [00:47<00:09, 122.75it/s]

File ../Stocks/vist.us.txt is empty.
File ../Stocks/otg.us.txt is empty.


 98%|█████████▊| 7065/7195 [00:55<00:00, 152.97it/s]

File ../Stocks/scph.us.txt is empty.


100%|██████████| 7195/7195 [00:56<00:00, 127.07it/s]


In [24]:
print(stocks_df['OpenInt'].unique())
stocks_df.drop(columns=['OpenInt'], inplace=True)

[0]


In [28]:
stocks_df

Unnamed: 0,Date,Open,High,Low,Close,Volume,Name
0,2016-09-16,24.094,24.363,23.596,23.757,5134322,ntb
1,2016-09-19,23.903,24.162,23.517,23.517,526200,ntb
2,2016-09-20,23.375,23.566,23.038,23.566,499319,ntb
3,2016-09-21,23.239,23.517,23.038,23.067,248349,ntb
4,2016-09-22,23.182,23.182,23.028,23.048,390700,ntb
...,...,...,...,...,...,...,...
14887660,2017-11-06,38.350,38.865,38.270,38.510,315142,aat
14887661,2017-11-07,38.640,39.090,38.555,38.980,454858,aat
14887662,2017-11-08,38.740,39.160,38.680,38.900,262062,aat
14887663,2017-11-09,38.860,39.830,38.860,39.420,431340,aat


0           24.094
1           23.903
2           23.375
3           23.239
4           23.182
             ...  
14887660    38.350
14887661    38.640
14887662    38.740
14887663    38.860
14887664    39.210
Name: Open, Length: 14887665, dtype: float64