In [22]:
import pandas as pd

In [23]:
df = pd.read_csv('data/nasdaq.csv')
df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,04/17/2025,18258.09,18369.90,18401.05,18144.46,364.51M,0.00%
1,04/16/2025,18257.64,18475.76,18597.32,17995.96,452.01M,-3.04%
2,04/15/2025,18830.23,18831.98,19004.17,18754.87,273.21M,0.18%
3,04/14/2025,18796.02,19095.49,19115.15,18614.76,361.31M,0.57%
4,04/11/2025,18690.05,18309.29,18728.95,18153.32,459.77M,1.89%
...,...,...,...,...,...,...,...
2657,09/24/2014,4094.31,4055.42,4096.27,4041.72,159.49M,1.05%
2658,09/23/2014,4051.57,4045.67,4069.21,4043.26,166.69M,-0.24%
2659,09/22/2014,4061.23,4091.87,4092.14,4044.99,179.21M,-0.95%
2660,09/19/2014,4100.09,4113.02,4118.91,4084.45,539.73M,-0.07%


In [24]:
# transform Date column to datetime from dd/mm/yyyy
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y')

df['Price'] = df['Price'].replace(',', '', regex=True).astype(float)
df['Open'] = df['Open'].replace(',', '', regex=True).astype(float)
df['High'] = df['High'].replace(',', '', regex=True).astype(float)
df['Low'] = df['Low'].replace(',', '', regex=True).astype(float)

df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,2025-04-17,18258.09,18369.90,18401.05,18144.46,364.51M,0.00%
1,2025-04-16,18257.64,18475.76,18597.32,17995.96,452.01M,-3.04%
2,2025-04-15,18830.23,18831.98,19004.17,18754.87,273.21M,0.18%
3,2025-04-14,18796.02,19095.49,19115.15,18614.76,361.31M,0.57%
4,2025-04-11,18690.05,18309.29,18728.95,18153.32,459.77M,1.89%
...,...,...,...,...,...,...,...
2657,2014-09-24,4094.31,4055.42,4096.27,4041.72,159.49M,1.05%
2658,2014-09-23,4051.57,4045.67,4069.21,4043.26,166.69M,-0.24%
2659,2014-09-22,4061.23,4091.87,4092.14,4044.99,179.21M,-0.95%
2660,2014-09-19,4100.09,4113.02,4118.91,4084.45,539.73M,-0.07%


In [25]:
# reverse the dataframe using Date column
df = df.sort_values(by='Date', ascending=True)
df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
2661,2014-09-18,4103.08,4084.51,4103.41,4080.82,145.06M,0.72%
2660,2014-09-19,4100.09,4113.02,4118.91,4084.45,539.73M,-0.07%
2659,2014-09-22,4061.23,4091.87,4092.14,4044.99,179.21M,-0.95%
2658,2014-09-23,4051.57,4045.67,4069.21,4043.26,166.69M,-0.24%
2657,2014-09-24,4094.31,4055.42,4096.27,4041.72,159.49M,1.05%
...,...,...,...,...,...,...,...
4,2025-04-11,18690.05,18309.29,18728.95,18153.32,459.77M,1.89%
3,2025-04-14,18796.02,19095.49,19115.15,18614.76,361.31M,0.57%
2,2025-04-15,18830.23,18831.98,19004.17,18754.87,273.21M,0.18%
1,2025-04-16,18257.64,18475.76,18597.32,17995.96,452.01M,-3.04%


In [26]:
start_date = pd.to_datetime('2014-09-18')
end_date = pd.to_datetime('2025-04-20')

date_range = pd.date_range(start=start_date, end=end_date)
df = df.set_index('Date').reindex(date_range, method='nearest').reset_index()
df.rename(columns={'index': 'Date'}, inplace=True)
df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,2014-09-18,4103.08,4084.51,4103.41,4080.82,145.06M,0.72%
1,2014-09-19,4100.09,4113.02,4118.91,4084.45,539.73M,-0.07%
2,2014-09-20,4100.09,4113.02,4118.91,4084.45,539.73M,-0.07%
3,2014-09-21,4061.23,4091.87,4092.14,4044.99,179.21M,-0.95%
4,2014-09-22,4061.23,4091.87,4092.14,4044.99,179.21M,-0.95%
...,...,...,...,...,...,...,...
3863,2025-04-16,18257.64,18475.76,18597.32,17995.96,452.01M,-3.04%
3864,2025-04-17,18258.09,18369.90,18401.05,18144.46,364.51M,0.00%
3865,2025-04-18,18258.09,18369.90,18401.05,18144.46,364.51M,0.00%
3866,2025-04-19,18258.09,18369.90,18401.05,18144.46,364.51M,0.00%


In [27]:
df_data = pd.read_csv('data/data.csv')

# check if date column exists in data/data.csv
if 'Date' not in df_data.columns:
    # set Date column using df['Date']
    df_data['Date'] = df['Date']

if 'NASDAQ Open' not in df_data.columns:
    df_data['NASDAQ Open'] = df['Open']

# convert Date column to datetime
df_data['Date'] = pd.to_datetime(df_data['Date'], format='%Y-%m-%d')

# check if there are missing newer dates in data/data.csv compared to date_range
if df_data['Date'].max() < df['Date'].max():
    # add missing dates to data/data.csv
    missing_dates = df[~df['Date'].isin(df_data['Date'])]
    missing_dates = missing_dates[['Date', 'Open']]
    missing_dates.rename(columns={'Open': 'NASDAQ Open'}, inplace=True)
    df_data = pd.concat([df_data, missing_dates], ignore_index=True)

# write the updated data/data.csv
df_data.to_csv('data/data.csv', index=False)