In [1]:
import pandas as pd
from datetime import timedelta
import datetime as dt
import json
import os
import glob
from pprint import pprint

In [2]:
# read the data from the json files
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
target_dir = os.path.join(parent_dir,'stock_scrape_load','data')
file_path = f"{target_dir}/*.json"

# declare empty list to store data
data = []

# Loop through all json files and update data list
for file in glob.glob(file_path):
    with open(file, 'r') as json_file:
        json_data = json.load(json_file)
        data.append(json_data)

# Create Dataframe
prices_df = pd.concat([pd.DataFrame(d) for d in data], ignore_index = True)
prices_df['date'] = pd.to_datetime(prices_df['date'])

# View that sweet sweet extended dataframe!
prices_df = prices_df.sort_values(['ticker','date'])
# prices_df.head()


In [3]:

def get_closing_date(row):
    reported_day = row['date']
    actual_close_day = row['datePrevClose']

    if reported_day.weekday() == 0: # When the previous day is Monday
        actual_close_day -= timedelta(days=2) # now it is Friday

    if pd.isnull(actual_close_day): # Check if actual_close_day is NaT
        actual_close_day = row['date'] - timedelta(days=1) # Fill with yesterday's date

    return actual_close_day # provide the date of the most recent Friday

prices_df['datePrevClose'] = prices_df['date'].shift(1) # Create new column, and locate it next to the current prices date
prices_df['datePrevClose'] = prices_df.apply(get_closing_date, axis=1)
prices_df['dailyClose'] = prices_df['previousClose'].shift(-1)

# prices_df.head(18)


In [4]:
# Pick out the columns I want included in final json
ticker = pd.DataFrame(prices_df[['ticker','date','datePrevClose','dailyOpen','dayHigh', 'dayLow','dailyClose','previousClose','dailyVolume','ask','bid']])

print(f"There are {len(prices_df.value_counts(['date']))} days of data represented")
# ticker.head(16)


There are 4 days of data represented


In [10]:
historical_json = []
grouped_tickers = prices_df.groupby(['ticker'])
grouped_dates = prices_df.groupby(['date'])
data = {
    "tickers": list(grouped_tickers.groups.keys()),
    "dates": list(grouped_dates.groups.keys()),
    "history": []
}

for ticker, group in grouped_tickers:
    for index, row in group.iterrows():
        ticker_data = {
            "ticker": ticker,
            "date": row['date'],
            "values": [{
                 # "symbol": row['ticker'],
                "date": row['date'],
                "datePrevClose": row['datePrevClose'],
                "dailyOpen": row['dailyOpen'],
                "dayHigh": row['dayHigh'],
                "dayLow": row['dayLow'],
                # "dailyClose": row['dailyClose'],
                "previousClose": row['previousClose'],
                "ask": row['ask'],
                "bid": row['bid'],
                "dailyVolume": row['dailyVolume']
            }]
        }
        data["history"].append(ticker_data)
    #     ticker_data["day"].append(day_data)
    # data["days"].append(ticker_data)

# json cannot work with datetime dtypes: https://www.geeksforgeeks.org/how-to-fix-datetime-datetime-not-json-serializable-in-python/
def serialize_dt(obj):
    if isinstance(obj, dt.datetime):
        return obj.isoformat()
    raise TypeError("Type not serializable")

json_data = json.dumps(data, indent=4, default = serialize_dt)
with open("historical.json", "w") as json_file:
    json_file.write(json_data)

In [8]:

# # Step 1: Get unique tickers
# unique_tickers = prices_df['ticker'].unique()

# # Step 2: Create a dictionary to store the nested JSON structure
# data = []

# # Step 3: Iterate over the unique tickers
# for ticker in unique_tickers:
#     # Filter the DataFrame for the current ticker
#     ticker_data = prices_df[prices_df['ticker'] == ticker]

#     # Create a dictionary for each day
#     day_data = []
#     for _, row in ticker_data.iterrows():
#         day = {
#             "symbol": ticker,
#             "date": row['date'],
#             "dailyOpen": row['dailyOpen'],
#             "ask": row['ask'],
#             "bid": row['bid'],
#             "previousClose": row['previousClose'],
#             "dailyVolume": row['dailyVolume']
#         }
#         day_data.append(day)

#     # Create a dictionary for the ticker with nested day data
#     ticker_dict = {
#         "ticker": ticker,
#         "day": day_data
#     }

#     # Append the ticker dictionary to the data list
#     data.append(ticker_dict)

# # json cannot work with datetime dtypes: https://www.geeksforgeeks.org/how-to-fix-datetime-datetime-not-json-serializable-in-python/
# def serialize_dt(obj):
#     if isinstance(obj, dt.datetime):
#         return obj.isoformat()
#     raise TypeError("Type not serializable")

# json_data = json.dumps(data, indent=4, default = serialize_dt)
# with open("historical2.json", "w") as json_file:
#     json_file.write(json_data)
    

In [8]:
# historical_json = []
# grouped_tickers = prices_df.groupby(['ticker'])
# grouped_dates = prices_df.groupby(['date'])
# data = {
#     "tickers": list(grouped_tickers.groups.keys()),
#     "dates": list(grouped_dates.groups.keys()),
#     "history": []
# }

# for ticker, group in grouped_tickers:
#     ticker_data = {
#         "ticker": ticker,
#         "date": row['date']
#         "market": []
#     }

#     for date, day_group in group.groupby('date'):
#         date_data ={
#             "date": date,
#             # "symbol": row['ticker'],
#             "stats":[]
#         }
    
#         for index, row in day_group.iterrows():
#             entry_data = {
#                 # "symbol": row['ticker'],
#                 "date": row['date'],
#                 "datePrevClose": row['datePrevClose'],
#                 "dailyOpen": row['dailyOpen'],
#                 "dayHigh": row['dayHigh'],
#                 "dayLow": row['dayLow'],
#                 # "dailyClose": row['dailyClose'],
#                 "previousClose": row['previousClose'],
#                 "ask": row['ask'],
#                 "bid": row['bid'],
#                 "dailyVolume": row['dailyVolume']
#             }
#             date_data["market"].append(entry_data)
#         ticker_data["market"].append(date_data)
#     data["history"].append(ticker_data)
#     #     ticker_data["day"].append(day_data)
#     # data["days"].append(ticker_data)

# # json cannot work with datetime dtypes: https://www.geeksforgeeks.org/how-to-fix-datetime-datetime-not-json-serializable-in-python/
# def serialize_dt(obj):
#     if isinstance(obj, dt.datetime):
#         return obj.isoformat()
#     raise TypeError("Type not serializable")

# json_data = json.dumps(data, indent=4, default = serialize_dt)
# with open("historical.json", "w") as json_file:
#     json_file.write(json_data)
    

In [None]:
#-----------
# Archive code below


In [9]:
## Old code-- delete later

# prices_df.to_json(
#     path_or_buf = 'all_prices_xdf.json',
#     orient = 'records',
#     date_format = 'iso')

# Old code - second attempt - delete later    json_file.write(json_data)



# Old code - delete later

#  THe follow is an attempt to reorganize the datafram into a structured json.  Then I realized it did not really matter
# json_data = {}

# for index, row in prices_df.iterrows():
#     date = row['date']
#     open = row['dailyOpen'], 
#     close = row['previousClose'], 
#     volume = row['dailyVolume'],
#     ticker = row['ticker'], 
#     cap = row['marketCap'], 
#     quick = row['quickRatio'],
#     sector = row['sector'],
#     low = row['52WeekLow'], 
#     high = row['52WeekHigh'], 
#     change = row['52WeekChange'],
#     ave_vol = row['10DayAverageVolume']

#     if date not in json_data:
#         json_data[date] = {'price':[],'info':[],'stats':[]}

#     json_data[date]['price'].append({'ticker':ticker,
#                                      'dailyOpen':open,
#                                      'previousClose':close,
#                                      'dailyVolume':volume
#                                                       })
#     json_data[date]['info'].append({'sector':sector,
#                                     'marketCap':cap,
#                                     })
#     json_data[date]['stats'].append({'52WeekLow':low,
#                                      '52WeekHigh':high, 
#                                      '52WeekChange': change,
#                                      '10DayAverageVolume':ave_vol
#                                      })
# # price_json = json.dumps(json_data)

# # pprint(price_json)

# prices = pd.DataFrame(json_data)
# prices.to_json('all_prices.json')


# Old code --  delet later

# Same commetn as previous cell

# output_path = f"{target_dir}/all_prices.json"

# with open('all_prices.json','w') as file:
#     file.write(price_json)