In [9]:
import pandas as pd

In [10]:
df = pd.read_csv('data/btc.csv')
df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,04/20/2025,85236.6,85075.5,85321.0,85048.8,28.94K,0.20%
1,04/19/2025,85068.1,84472.4,85604.3,84366.5,32.07K,0.70%
2,04/18/2025,84474.6,84947.7,85106.1,84331.0,25.83K,-0.55%
3,04/17/2025,84940.0,84032.2,85467.3,83786.2,49.28K,1.08%
4,04/16/2025,84032.2,83648.1,85438.2,83143.5,63.97K,0.46%
...,...,...,...,...,...,...,...
3863,09/22/2014,404.1,401.6,410.8,398.6,13.52K,0.62%
3864,09/21/2014,401.6,411.5,415.1,394.5,10.42K,-2.41%
3865,09/20/2014,411.5,397.7,427.7,387.5,14.29K,3.49%
3866,09/19/2014,397.7,424.3,429.2,386.1,14.97K,-6.27%


In [11]:
# transform Date column to datetime from dd/mm/yyyy
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y')

# transform price,open,high,low columns to double
df['Price'] = df['Price'].replace(',', '', regex=True).astype(float)
df['Open'] = df['Open'].replace(',', '', regex=True).astype(float)
df['High'] = df['High'].replace(',', '', regex=True).astype(float)
df['Low'] = df['Low'].replace(',', '', regex=True).astype(float)
df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,2025-04-20,85236.6,85075.5,85321.0,85048.8,28.94K,0.20%
1,2025-04-19,85068.1,84472.4,85604.3,84366.5,32.07K,0.70%
2,2025-04-18,84474.6,84947.7,85106.1,84331.0,25.83K,-0.55%
3,2025-04-17,84940.0,84032.2,85467.3,83786.2,49.28K,1.08%
4,2025-04-16,84032.2,83648.1,85438.2,83143.5,63.97K,0.46%
...,...,...,...,...,...,...,...
3863,2014-09-22,404.1,401.6,410.8,398.6,13.52K,0.62%
3864,2014-09-21,401.6,411.5,415.1,394.5,10.42K,-2.41%
3865,2014-09-20,411.5,397.7,427.7,387.5,14.29K,3.49%
3866,2014-09-19,397.7,424.3,429.2,386.1,14.97K,-6.27%


In [12]:
# reverse the dataframe using Date column
df = df.sort_values(by='Date', ascending=True)
df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
3867,2014-09-18,424.3,461.1,462.3,408.8,8.93K,-7.99%
3866,2014-09-19,397.7,424.3,429.2,386.1,14.97K,-6.27%
3865,2014-09-20,411.5,397.7,427.7,387.5,14.29K,3.49%
3864,2014-09-21,401.6,411.5,415.1,394.5,10.42K,-2.41%
3863,2014-09-22,404.1,401.6,410.8,398.6,13.52K,0.62%
...,...,...,...,...,...,...,...
4,2025-04-16,84032.2,83648.1,85438.2,83143.5,63.97K,0.46%
3,2025-04-17,84940.0,84032.2,85467.3,83786.2,49.28K,1.08%
2,2025-04-18,84474.6,84947.7,85106.1,84331.0,25.83K,-0.55%
1,2025-04-19,85068.1,84472.4,85604.3,84366.5,32.07K,0.70%


In [13]:
start_date = pd.to_datetime('2014-09-18')
end_date = pd.to_datetime('2025-04-20')

date_range = pd.date_range(start=start_date, end=end_date)
df = df.set_index('Date').reindex(date_range, method='nearest').reset_index()
df.rename(columns={'index': 'Date'}, inplace=True)
df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,2014-09-18,424.3,461.1,462.3,408.8,8.93K,-7.99%
1,2014-09-19,397.7,424.3,429.2,386.1,14.97K,-6.27%
2,2014-09-20,411.5,397.7,427.7,387.5,14.29K,3.49%
3,2014-09-21,401.6,411.5,415.1,394.5,10.42K,-2.41%
4,2014-09-22,404.1,401.6,410.8,398.6,13.52K,0.62%
...,...,...,...,...,...,...,...
3863,2025-04-16,84032.2,83648.1,85438.2,83143.5,63.97K,0.46%
3864,2025-04-17,84940.0,84032.2,85467.3,83786.2,49.28K,1.08%
3865,2025-04-18,84474.6,84947.7,85106.1,84331.0,25.83K,-0.55%
3866,2025-04-19,85068.1,84472.4,85604.3,84366.5,32.07K,0.70%


In [14]:
df_data = pd.read_csv('data/data.csv')

# check if date column exists in data/data.csv
if 'Date' not in df_data.columns:
    # set Date column using df['Date']
    df_data['Date'] = df['Date']

if 'BTC Close' not in df_data.columns:
    df_data['BTC Close'] = df['Price']
if 'BTC Open' not in df_data.columns:
    df_data['BTC Open'] = df['Open']

# convert Date column to datetime
df_data['Date'] = pd.to_datetime(df_data['Date'], format='%Y-%m-%d')

# check if there are missing newer dates in data/data.csv compared to date_range
if df_data['Date'].max() < df['Date'].max():
    # add missing dates to data/data.csv
    missing_dates = df[~df['Date'].isin(df_data['Date'])]
    missing_dates = missing_dates[['Date', 'Price', 'Open']]
    missing_dates.rename(columns={'Price': 'BTC Close'}, inplace=True)
    missing_dates.rename(columns={'Open': 'BTC Open'}, inplace=True)
    df_data = pd.concat([df_data, missing_dates], ignore_index=True)

# write the updated data/data.csv
df_data.to_csv('data/data.csv', index=False)