In [7]:
import pandas as pd

In [8]:
df = pd.read_csv('data/gold.csv')
df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,04/18/2025,3341.30,3341.30,3341.30,3341.30,,0.39%
1,04/17/2025,3328.40,3357.50,3371.90,3296.40,242.07K,-0.54%
2,04/16/2025,3346.40,3248.40,3358.40,3245.20,258.38K,3.27%
3,04/15/2025,3240.40,3226.10,3250.90,3225.50,123.47K,0.44%
4,04/14/2025,3226.30,3246.00,3261.60,3208.70,189.62K,-0.56%
...,...,...,...,...,...,...,...
2707,09/24/2014,1219.50,1223.20,1226.70,1216.20,131.42K,-0.20%
2708,09/23/2014,1222.00,1214.90,1237.00,1214.70,153.20K,0.34%
2709,09/22/2014,1217.90,1216.40,1221.00,1208.80,138.05K,0.11%
2710,09/19/2014,1216.60,1225.60,1229.20,1214.20,173.50K,-0.84%


In [9]:
# transform Date column to datetime from dd/mm/yyyy
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y')

# transform price,open,high,low columns to double
df['Price'] = df['Price'].replace(',', '', regex=True).astype(float)
df['Open'] = df['Open'].replace(',', '', regex=True).astype(float)
df['High'] = df['High'].replace(',', '', regex=True).astype(float)
df['Low'] = df['Low'].replace(',', '', regex=True).astype(float)
df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,2025-04-18,3341.3,3341.3,3341.3,3341.3,,0.39%
1,2025-04-17,3328.4,3357.5,3371.9,3296.4,242.07K,-0.54%
2,2025-04-16,3346.4,3248.4,3358.4,3245.2,258.38K,3.27%
3,2025-04-15,3240.4,3226.1,3250.9,3225.5,123.47K,0.44%
4,2025-04-14,3226.3,3246.0,3261.6,3208.7,189.62K,-0.56%
...,...,...,...,...,...,...,...
2707,2014-09-24,1219.5,1223.2,1226.7,1216.2,131.42K,-0.20%
2708,2014-09-23,1222.0,1214.9,1237.0,1214.7,153.20K,0.34%
2709,2014-09-22,1217.9,1216.4,1221.0,1208.8,138.05K,0.11%
2710,2014-09-19,1216.6,1225.6,1229.2,1214.2,173.50K,-0.84%


In [10]:
# reverse the dataframe using Date column
df = df.sort_values(by='Date', ascending=True)
df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
2711,2014-09-18,1226.9,1223.1,1228.7,1216.3,151.11K,-0.73%
2710,2014-09-19,1216.6,1225.6,1229.2,1214.2,173.50K,-0.84%
2709,2014-09-22,1217.9,1216.4,1221.0,1208.8,138.05K,0.11%
2708,2014-09-23,1222.0,1214.9,1237.0,1214.7,153.20K,0.34%
2707,2014-09-24,1219.5,1223.2,1226.7,1216.2,131.42K,-0.20%
...,...,...,...,...,...,...,...
4,2025-04-14,3226.3,3246.0,3261.6,3208.7,189.62K,-0.56%
3,2025-04-15,3240.4,3226.1,3250.9,3225.5,123.47K,0.44%
2,2025-04-16,3346.4,3248.4,3358.4,3245.2,258.38K,3.27%
1,2025-04-17,3328.4,3357.5,3371.9,3296.4,242.07K,-0.54%


In [11]:
start_date = pd.to_datetime('2014-09-18')
end_date = pd.to_datetime('2025-04-20')

date_range = pd.date_range(start=start_date, end=end_date)
df = df.set_index('Date').reindex(date_range, method='nearest').reset_index()
df.rename(columns={'index': 'Date'}, inplace=True)
df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,2014-09-18,1226.9,1223.1,1228.7,1216.3,151.11K,-0.73%
1,2014-09-19,1216.6,1225.6,1229.2,1214.2,173.50K,-0.84%
2,2014-09-20,1216.6,1225.6,1229.2,1214.2,173.50K,-0.84%
3,2014-09-21,1217.9,1216.4,1221.0,1208.8,138.05K,0.11%
4,2014-09-22,1217.9,1216.4,1221.0,1208.8,138.05K,0.11%
...,...,...,...,...,...,...,...
3863,2025-04-16,3346.4,3248.4,3358.4,3245.2,258.38K,3.27%
3864,2025-04-17,3328.4,3357.5,3371.9,3296.4,242.07K,-0.54%
3865,2025-04-18,3341.3,3341.3,3341.3,3341.3,,0.39%
3866,2025-04-19,3341.3,3341.3,3341.3,3341.3,,0.39%


In [12]:
df_data = pd.read_csv('data/data.csv')

# check if date column exists in data/data.csv
if 'Date' not in df_data.columns:
    # set Date column using df['Date']
    df_data['Date'] = df['Date']

if 'Gold Open' not in df_data.columns:
    df_data['Gold Open'] = df['Open']

# convert Date column to datetime
df_data['Date'] = pd.to_datetime(df_data['Date'], format='%Y-%m-%d')

# check if there are missing newer dates in data/data.csv compared to date_range
if df_data['Date'].max() < df['Date'].max():
    # add missing dates to data/data.csv
    missing_dates = df[~df['Date'].isin(df_data['Date'])]
    missing_dates = missing_dates[['Date', 'Open']]
    missing_dates.rename(columns={'Open': 'Gold Open'}, inplace=True)
    df_data = pd.concat([df_data, missing_dates], ignore_index=True)

# write the updated data/data.csv
df_data.to_csv('data/data.csv', index=False)