In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('/kaggle/input/bitcoin-historical-data/bitstampUSD_1-min_data_2012-01-01_to_2020-09-14.csv')

df.head()

### Rough steps to resample data:
1. Translate ```Timestamp``` column from UNIX timestamp to UTC
2. I think we should get rid of ```Weighted_Price``` column, there is NO indication on how this is calculated anywhere.
3. Fill the missing values based on the following settings

| Columns                  | Fill NA value | Fill NA method         | Rationale                                                                          |
|--------------------------|---------------|------------------------|------------------------------------------------------------------------------------|
| Volumes (btc & currency) | 0.0           | -                      | Volume is zero because NO transaction occurred                                     |
| Weighted price           | -             | -                      | We will get rid of this column                                                     |
| Close                    | -             | Forward Fill           | Follow previous period Close price                                                 |
| Open, High, Low          | -             | Row wise Backward Fill | If no transaction occurred, Open = High = Low = Close. So just follow Close price. |

In [None]:
from datetime import datetime    # to translate unix time stamp to UTC time

In [None]:
datetime.utcfromtimestamp(1600041360)   # example

In [None]:
df['Timestamp'] = df['Timestamp'].apply(datetime.utcfromtimestamp)

df

In [None]:
df = df.set_index('Timestamp', drop=True)

In [None]:
df

In [None]:
# follow the steps above: fill NA Volume columns with 0.0
df[['Volume_(BTC)','Volume_(Currency)']] = df[['Volume_(BTC)','Volume_(Currency)']].fillna(0)

df

In [None]:
# follow the steps above: fill NA Close columns with PREVIOUS Close price (forward fill)
df['Close'] = df['Close'].fillna(method='ffill')
df

In [None]:
# follow the steps above: fill NA Open, High, Low columns with Close price (row wise backfill)
df = df.fillna(axis=1, method='backfill')
df

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(df['Close'])

In [None]:
# agg dictionary for OHLCV data
agg_functions = {
    'Open': 'first',
    'High': np.max,
    'Low': np.min,
    'Close': 'last',
    'Volume_(BTC)': np.sum,
    'Volume_(Currency)': np.sum   
}

In [None]:
df_1H = df.resample('1H').agg(agg_functions)
df_1H

In [None]:
df_4H = df.resample('4H').agg(agg_functions)

In [None]:
df_1D = df.resample('1D').agg(agg_functions)

In [None]:
df_1H.to_csv('./BTC_1H.csv')
df_4H.to_csv('./BTC_4H.csv')
df_1D.to_csv('./BTC_1D.csv')