In [39]:
import pandas as pd
from datetime import datetime

def get_stock_data(file_name):
    """scrapes and cleans the data from the given file and creates a dataframe
    
    Args:
        file_name (string) : name of file
    
    Returns:
        df_stock (dataframe) : dataframe containing stock info scraped from file
    """
    df_stock = pd.DataFrame()
    file = open(file_name)
    txt = file.read()
    file.close()
    
    # split text into list, split by new line character
    txt = txt.split('\n')
    # get column titles
    columns = txt[0].split(',')

    for line in txt[1:]:
        temp_dict = dict()
        line = line.strip()
        line_list = line.split(',')

        # if row does not have sufficient column information, pass over
        if len(columns) != len(line_list):
            continue

        # add column's corresponding values to a temporary dictionary   
        for idx in range(len(columns)):
            column_name = columns[idx]
            
            # change all date column info to datetime object
            if column_name == 'Date':
                temp_dict[column_name] = datetime.strptime(line_list[idx], '%Y-%m-%d')
            else:
                temp_dict[column_name] = line_list[idx]

        # append dictionary to dataframe                                                  
        df_stock = df_stock.append(temp_dict, ignore_index=True)
    
    return df_stock

In [40]:
file_name = 'a.us.txt'
df_a_stock = get_stock_data(file_name)
df_a_stock

Unnamed: 0,Close,Date,High,Low,Open,OpenInt,Volume
0,29.702,1999-11-18,33.754,27.002,30.713,0,66277506
1,27.257,1999-11-19,29.027,26.872,28.986,0,16142920
2,29.702,1999-11-22,29.702,27.044,27.886,0,6970266
3,27.002,1999-11-23,29.446,27.002,28.688,0,6332082
4,27.717,1999-11-24,28.309,27.002,27.083,0,5132147
...,...,...,...,...,...,...,...
4516,68.22,2017-11-06,68.45,68.22,68.22,0,995731
4517,68.25,2017-11-07,68.64,68.04,68.32,0,966466
4518,68.11,2017-11-08,68.33,67.771,68.1,0,972616
4519,67.47,2017-11-09,67.98,66.91,67.92,0,1673083


In [41]:
some_stock_files = ['a.us.txt', 'abc.us.txt', 'aktx.us.txt', 'blue.us.txt', 'bro.us.txt', 'by.us.txt', 'casi.us.txt',
                'cbu.us.txt', 'cxdc.us.txt', 'dhr.us.txt', 'dxyn.us.txt', 'ebay.us.txt', 'eei.us.txt', 
                'eod.us.txt', 'fox.us.txt', 'ftrpr.us.txt', 'fwonk.us.txt']

In [96]:
def get_one_day_dif(stock_dataframe):
    """ Calculates the one day difference between stock closing value (today - yesterday)
        and determines if stock price increased or decreased from previous day
    
    Args:
        stock_dataframe (dataframe) : dataframe containing stock info(close value, date, high, low, open, etc.)
    
    Returns:
        df_one_day_dif (dataframe) : dataframe containing the difference from the previous day's stock value
    """
    
    # Subset the initial DF to obtain only relevant columns
    one_day_dif_dataframe = stock_dataframe.copy()
    one_day_dif_dataframe = one_day_dif_dataframe[['Date','Close']]
    
    #Convert CV to numeric for calculations
    one_day_dif_dataframe['Close'] = pd.to_numeric(one_day_dif_dataframe['Close'])
    one_day_dif_dataframe['Yesterday Close'] = one_day_dif_dataframe['Close'].shift()
    
    # Calculate the stock's closing price difference from the previous day
    one_day_dif_dataframe['Close Value Difference'] = one_day_dif_dataframe['Close'] - one_day_dif_dataframe['Yesterday Close']
    
    return one_day_dif_dataframe
    

In [97]:
get_one_day_dif(df_a_stock)

Unnamed: 0,Date,Close,Yesterday Close,Close Value Difference
0,1999-11-18,29.702,,
1,1999-11-19,27.257,29.702,-2.445
2,1999-11-22,29.702,27.257,2.445
3,1999-11-23,27.002,29.702,-2.700
4,1999-11-24,27.717,27.002,0.715
...,...,...,...,...
4516,2017-11-06,68.220,68.370,-0.150
4517,2017-11-07,68.250,68.220,0.030
4518,2017-11-08,68.110,68.250,-0.140
4519,2017-11-09,67.470,68.110,-0.640
