In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
# get a list of files from the 'data' directory
filelist = os.listdir('../data/')
csvs = []
for file in filelist:
    if file.endswith('.csv'):
        csvs.append(file)

In [3]:
# get a list of the stock abbreviations
# Pythonic code explanation: for loop is iterating through each file name, then each item is having the '.csv' portion being stripped from the right...
#... then is being split into a list with the underscore acting as the separator, and then finally adding the last item to the list by indexing backwards with -1
stocknames = list(stock.rstrip('.csv').split('_')[-1] for stock in csvs)

In [4]:
def format_stockcsv(stockfile):
    
    # gets the number of lines to skip before saving the CSV as a dataframe, as the files have been saved in an unconventional manner by Macrotrends
    with open(('../data/' + stockfile), 'r') as file:
        for linenumber, line in enumerate(file):
            if line.startswith('date'):
                skipline = linenumber
        
    stock_df = pd.read_csv(('../data/' + stockfile), skiprows=skipline)
    
    # convert the first column into datetime format and the rest (except the last one) into float32
    stock_df['date'] = pd.to_datetime(stock_df['date'])
    stock_df[['open', 'high', 'low', 'close']] = stock_df[['open', 'high',  'low', 'close']].astype('float32')
    
    return stock_df

In [5]:
# add the stock ticker for each of the stocks, then save as a separate CSV file first, and save a combined CSV file at the end
for i, csvfile in enumerate(csvs):
    stock_df = format_stockcsv(csvfile)
    stock_df['ticker'] = stocknames[i]
    if i == 0:
        all_stocks = stock_df.copy()
    else:
        all_stocks = pd.concat([all_stocks, stock_df], axis=0)
    stock_df.to_csv('../data/formatted/' + csvfile.rstrip('.csv') + '_formatted.csv', index=False)

In [6]:
# save the combined stock dataframe into a file
all_stocks.to_csv('../data/formatted/combined.csv', index=False)