In [1]:
# Import libraries
import pandas as pd

### Resample data from 1 min into a different timeframe

In [3]:
def resample_ohlcv(input_file, output_file, timeframe='1h'):
    try:
        # Read and prepare data
        df = pd.read_csv(input_file)
        df.columns = ['Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume']
        df['Timestamp'] = pd.to_datetime(df['Timestamp'])
        df.set_index('Timestamp', inplace=True)
        
        # Verify data is sorted
        if not df.index.is_monotonic_increasing:
            df.sort_index(inplace=True)
            
        # Resample
        df_resampled = df.resample(timeframe, closed='left', label='left').agg({
            'Open': 'first',
            'High': 'max',
            'Low': 'min',
            'Close': 'last',
            'Volume': 'sum'
        }).dropna()
        
        # Validate output
        assert not df_resampled.empty, "Resampling resulted in empty dataframe"
        assert not df_resampled.isnull().any().any(), "NaN values found in resampled data"
        
        # Save output
        df_resampled.to_csv(output_file)
        print(f"Successfully resampled to {timeframe} timeframe")
        
    except Exception as e:
        print(f"Error during resampling: {str(e)}")

# Usage
input_file = 'data/futures/SOLUSDT/SOLUSDT_futures_2020-01-01_to_2025-01-30_1m.csv'
output_file = '../data/SOLUSDT_1h_2020-2025.csv'
resample_ohlcv(input_file, output_file, '1h')

Successfully resampled to 1h timeframe


In [4]:
df = pd.read_csv('../data/BTCUSDT_1h_2020-2025.csv') # Load resampled data
df.tail(5) # Display last 5 rows

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume
44527,2025-01-29 07:00:00,102763.2,102768.2,102417.4,102723.2,3872.373
44528,2025-01-29 08:00:00,102723.2,103012.6,102644.1,102705.0,4329.903
44529,2025-01-29 09:00:00,102705.0,102744.8,102258.4,102300.0,4454.092
44530,2025-01-29 10:00:00,102300.0,102690.0,102153.8,102510.0,3107.374
44531,2025-01-29 11:00:00,102510.0,102650.0,102374.4,102635.0,1396.357
