In [1]:
import io
import pandas as pd
import numpy as np
import plotly.graph_objects as go

## Uploading data to colab


In [7]:
from google.colab import files
 
 
uploaded = files.upload()

Saving data-1646220539832.csv to data-1646220539832.csv


## Loading data

In [9]:
# Read uploaded colab file
data = pd.read_csv(io.BytesIO(uploaded['data-1646220539832.csv']))
# Read normal csv file
# data = pd.read_csv('file-paht.csv')
data.describe()

Unnamed: 0,pool_id,low_ratio,high_ratio,open_ratio,close_ratio
count,329.0,329.0,329.0,329.0,329.0
mean,3.0,0.334452,0.336047,0.33519,0.335228
std,0.0,0.013375,0.017783,0.014972,0.015761
min,3.0,0.29434,0.311105,0.308118,0.29434
25%,3.0,0.333334,0.333334,0.333334,0.333334
50%,3.0,0.333335,0.333335,0.333335,0.333335
75%,3.0,0.333335,0.333335,0.333335,0.333335
max,3.0,0.423896,0.456512,0.429383,0.448699


## Aggregating missing timeframes

In [10]:
# removing timezone
data.timeframe = data.timeframe.apply(lambda x: pd.to_datetime(x[:-6]))
starting_date = data[:1].timeframe.values[0]
end_date = data[-1:].timeframe.values[0]

print("Starting date: ", starting_date)
print("End date: ", end_date)

Starting date:  2022-03-01T21:51:00.000000000
End date:  2022-03-02T11:28:00.000000000


In [11]:
data 

Unnamed: 0,timeframe,pool_id,low_ratio,high_ratio,open_ratio,close_ratio
0,2022-03-01 21:51:00,3,0.333333,0.333333,0.333333,0.333333
1,2022-03-01 21:53:00,3,0.333333,0.333333,0.333333,0.333333
2,2022-03-01 21:54:00,3,0.333333,0.333333,0.333333,0.333333
3,2022-03-01 21:56:00,3,0.333333,0.333333,0.333333,0.333333
4,2022-03-01 21:57:00,3,0.333333,0.333333,0.333333,0.333333
...,...,...,...,...,...,...
324,2022-03-02 11:24:00,3,0.368501,0.383268,0.383268,0.379423
325,2022-03-02 11:25:00,3,0.342202,0.380361,0.368540,0.342202
326,2022-03-02 11:26:00,3,0.337204,0.352006,0.352006,0.342037
327,2022-03-02 11:27:00,3,0.340038,0.366787,0.340038,0.363201


In [12]:
explicit_pool = data[data['pool_id'] == 3]
explicit_pool = explicit_pool.drop(columns=['pool_id'])

new_data = pd.DataFrame(pd.date_range(start=starting_date, end=end_date, freq='min'), columns=["timeframe"])
new_data = new_data.join(explicit_pool.set_index('timeframe'), on='timeframe')
new_data = new_data.sort_values(['timeframe'])
new_data['missing_data'] = new_data['close_ratio'].apply(pd.isnull)
new_data['close_ratio'] = new_data['close_ratio'].fillna(method='ffill')
new_data['high_ratio'] = new_data.apply(lambda x: x['close_ratio'] if x['missing_data'] else x['high_ratio'], axis=1)
new_data['low_ratio'] = new_data.apply(lambda x: x['close_ratio'] if x['missing_data'] else x['low_ratio'], axis=1)
new_data['close_ratio'] = new_data.apply(lambda x: x['close_ratio'] if x['missing_data'] else x['close_ratio'], axis=1)
new_data['open_ratio'] = new_data.apply(lambda x: x['close_ratio'] if x['missing_data'] else x['open_ratio'], axis=1)
new_data = new_data.drop(columns=['missing_data'])
new_data

Unnamed: 0,timeframe,low_ratio,high_ratio,open_ratio,close_ratio
0,2022-03-01 21:51:00,0.333333,0.333333,0.333333,0.333333
1,2022-03-01 21:52:00,0.333333,0.333333,0.333333,0.333333
2,2022-03-01 21:53:00,0.333333,0.333333,0.333333,0.333333
3,2022-03-01 21:54:00,0.333333,0.333333,0.333333,0.333333
4,2022-03-01 21:55:00,0.333333,0.333333,0.333333,0.333333
...,...,...,...,...,...
813,2022-03-02 11:24:00,0.368501,0.383268,0.383268,0.379423
814,2022-03-02 11:25:00,0.342202,0.380361,0.368540,0.342202
815,2022-03-02 11:26:00,0.337204,0.352006,0.352006,0.342037
816,2022-03-02 11:27:00,0.340038,0.366787,0.340038,0.363201


In [13]:
df = new_data.copy()

fig = go.Figure(
    data=[
      go.Candlestick(
        x=df['timeframe'],
        open=df['open_ratio'],
        high=df['high_ratio'],
        low=df['low_ratio'],
        close=df['close_ratio']

      )
    ]
)

fig.show()