In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('data/oil.csv')
df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,04/17/2025,64.68,62.63,64.86,62.61,111.48K,3.54%
1,04/16/2025,62.47,61.54,62.98,60.44,213.19K,1.86%
2,04/15/2025,61.33,61.58,62.06,60.88,197.40K,-0.33%
3,04/14/2025,61.53,61.70,62.68,60.59,238.07K,0.05%
4,04/11/2025,61.50,60.20,61.87,59.43,306.23K,2.38%
...,...,...,...,...,...,...,...
2779,09/24/2014,92.80,91.72,93.29,91.12,282.63K,1.35%
2780,09/23/2014,91.56,90.70,92.09,90.58,247.77K,0.04%
2781,09/22/2014,91.52,92.22,92.64,91.20,23.94K,-0.96%
2782,09/19/2014,92.41,92.97,93.22,91.85,92.78K,-0.71%


In [3]:
# transform Date column to datetime from dd/mm/yyyy
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y')

# transform price,open,high,low columns to double
df['Price'] = df['Price'].replace(',', '', regex=True).astype(float)
df['Open'] = df['Open'].replace(',', '', regex=True).astype(float)
df['High'] = df['High'].replace(',', '', regex=True).astype(float)
df['Low'] = df['Low'].replace(',', '', regex=True).astype(float)
df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,2025-04-17,64.68,62.63,64.86,62.61,111.48K,3.54%
1,2025-04-16,62.47,61.54,62.98,60.44,213.19K,1.86%
2,2025-04-15,61.33,61.58,62.06,60.88,197.40K,-0.33%
3,2025-04-14,61.53,61.70,62.68,60.59,238.07K,0.05%
4,2025-04-11,61.50,60.20,61.87,59.43,306.23K,2.38%
...,...,...,...,...,...,...,...
2779,2014-09-24,92.80,91.72,93.29,91.12,282.63K,1.35%
2780,2014-09-23,91.56,90.70,92.09,90.58,247.77K,0.04%
2781,2014-09-22,91.52,92.22,92.64,91.20,23.94K,-0.96%
2782,2014-09-19,92.41,92.97,93.22,91.85,92.78K,-0.71%


In [4]:
# reverse the dataframe using Date column
df = df.sort_values(by='Date', ascending=True)
df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
2783,2014-09-18,93.07,93.97,94.82,92.85,170.77K,-1.43%
2782,2014-09-19,92.41,92.97,93.22,91.85,92.78K,-0.71%
2781,2014-09-22,91.52,92.22,92.64,91.20,23.94K,-0.96%
2780,2014-09-23,91.56,90.70,92.09,90.58,247.77K,0.04%
2779,2014-09-24,92.80,91.72,93.29,91.12,282.63K,1.35%
...,...,...,...,...,...,...,...
4,2025-04-11,61.50,60.20,61.87,59.43,306.23K,2.38%
3,2025-04-14,61.53,61.70,62.68,60.59,238.07K,0.05%
2,2025-04-15,61.33,61.58,62.06,60.88,197.40K,-0.33%
1,2025-04-16,62.47,61.54,62.98,60.44,213.19K,1.86%


In [5]:
start_date = pd.to_datetime('2014-09-18')
end_date = pd.to_datetime('2025-04-20')

date_range = pd.date_range(start=start_date, end=end_date)
df = df.set_index('Date').reindex(date_range, method='nearest').reset_index()
df.rename(columns={'index': 'Date'}, inplace=True)
df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,2014-09-18,93.07,93.97,94.82,92.85,170.77K,-1.43%
1,2014-09-19,92.41,92.97,93.22,91.85,92.78K,-0.71%
2,2014-09-20,92.41,92.97,93.22,91.85,92.78K,-0.71%
3,2014-09-21,91.52,92.22,92.64,91.20,23.94K,-0.96%
4,2014-09-22,91.52,92.22,92.64,91.20,23.94K,-0.96%
...,...,...,...,...,...,...,...
3863,2025-04-16,62.47,61.54,62.98,60.44,213.19K,1.86%
3864,2025-04-17,64.68,62.63,64.86,62.61,111.48K,3.54%
3865,2025-04-18,64.68,62.63,64.86,62.61,111.48K,3.54%
3866,2025-04-19,64.68,62.63,64.86,62.61,111.48K,3.54%


In [6]:
df_data = pd.read_csv('data/data.csv')

# check if date column exists in data/data.csv
if 'Date' not in df_data.columns:
    # set Date column using df['Date']
    df_data['Date'] = df['Date']

if 'Oil Open' not in df_data.columns:
    df_data['Oil Open'] = df['Open']

# convert Date column to datetime
df_data['Date'] = pd.to_datetime(df_data['Date'], format='%Y-%m-%d')

# check if there are missing newer dates in data/data.csv compared to date_range
if df_data['Date'].max() < df['Date'].max():
    # add missing dates to data/data.csv
    missing_dates = df[~df['Date'].isin(df_data['Date'])]
    missing_dates = missing_dates[['Date', 'Open']]
    missing_dates.rename(columns={'Open': 'Oil Open'}, inplace=True)
    df_data = pd.concat([df_data, missing_dates], ignore_index=True)

# write the updated data/data.csv
df_data.to_csv('data/data.csv', index=False)