In [10]:
# Import libraries
import pandas as pd

### Resample data from 1 min into a different timeframe

In [11]:
def resample_ohlcv(input_file, output_file, timeframe='1h'):
    try:
        # Read and prepare data
        df = pd.read_csv(input_file)
        df.columns = ['Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume']
        df['Timestamp'] = pd.to_datetime(df['Timestamp'])
        df.set_index('Timestamp', inplace=True)
        
        # Verify data is sorted
        if not df.index.is_monotonic_increasing:
            df.sort_index(inplace=True)
            
        # Resample
        df_resampled = df.resample(timeframe, closed='left', label='left').agg({
            'Open': 'first',
            'High': 'max',
            'Low': 'min',
            'Close': 'last',
            'Volume': 'sum'
        }).dropna()
        
        # Validate output
        assert not df_resampled.empty, "Resampling resulted in empty dataframe"
        assert not df_resampled.isnull().any().any(), "NaN values found in resampled data"
        
        # Save output
        df_resampled.to_csv(output_file)
        print(f"Successfully resampled to {timeframe} timeframe")
        
    except Exception as e:
        print(f"Error during resampling: {str(e)}")

# Usage
input_file = 'data/futures/BTCUSDT/BTCUSDT_futures_2020-01-01_to_2025-01-29_1m.csv'
output_file = '../data/BTCUSDT_3h_2020-2025.csv'
resample_ohlcv(input_file, output_file, '3h')

Successfully resampled to 3h timeframe


In [12]:
df = pd.read_csv('../data/BTCUSDT_3h_2020-2025.csv') # Load resampled data
df.tail(5) # Display last 5 rows

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume
14839,2025-01-28 21:00:00,101226.3,101489.0,100235.0,101279.6,23501.067
14840,2025-01-29 00:00:00,101279.7,102300.0,101269.7,101744.8,11149.268
14841,2025-01-29 03:00:00,101744.9,102368.4,101725.5,102218.3,6990.586
14842,2025-01-29 06:00:00,102218.3,103012.6,102187.5,102705.0,12724.673
14843,2025-01-29 09:00:00,102705.0,102744.8,102153.8,102635.0,8957.823
