In [1]:
import pandas as pd

# Fetch Data

In [3]:
import requests
import pandas as pd
from datetime import datetime
from tqdm import tqdm

BINANCE_CANDLE_COLUMNS = ['opentime', 'openprice', 'highprice', 'lowprice', 'closeprice', 'volume', 'closetime',
                          'quotevolume', 'trades', 'taker_buy_volume', 'taker_buy_quote', 'unused']

def binance_recursive_fetch_2(coins, interval, starttime, endtime, data_type='spot'):
    all_coins_result = {}
    data_list = []
    call_dict = {}
    
    for coin in tqdm(coins):
        result_list = []
        current_time = starttime
        call = 0
        timestamps = []
        
        while current_time < endtime:
            if ((int((endtime - current_time) / (1000 * 60))) + 1) >= 1000:
                limit = 1000
            else:
                limit = int((endtime - current_time) / (1000 * 60) + 1)
            
            if data_type == 'spot':
                url = (f'https://api.binance.com/api/v3/klines'
                       f'?symbol={coin}USDT'
                       f'&startTime={str(current_time)}'
                       f'&interval={interval}'
                       f'&limit={str(limit)}')
            elif data_type == 'futures':
                url = (f'https://fapi.binance.com/fapi/v1/klines'
                       f'?symbol={coin}USDT'
                       f'&startTime={str(current_time)}'
                       f'&interval={interval}'
                       f'&limit={str(limit)}')
            
            result_list += requests.get(url).json()
            
            if result_list:
                # Update current_time with the timestamp of the last data point fetched, plus 1 minute (60000 ms)
                current_time = result_list[-1][0] + 60000
                timestamps.append(current_time)
                call += 1
                
                # Check if the last fetched timestamp is greater than or equal to endtime
                if current_time >= endtime:
                    print(f"Reached endtime at {datetime.fromtimestamp(current_time / 1000).strftime('%Y-%m-%d %H:%M:%S')}. Stopping fetch.")
                    break
                
                print((datetime.fromtimestamp(current_time / 1000).strftime('%Y-%m-%d %H:%M:%S')) + 
                      f' status : {current_time < endtime}, time : {current_time}, limit : {call * 2}')
            
            # Ensure there's no continuous fetching of the same timestamp
            if len(timestamps) > 1 and timestamps[-1] == timestamps[-2]:
                print("Duplicate timestamp detected. Stopping fetch.")
                break
            
            # Sleep if needed to avoid rate limiting (adjust based on your rate limit)
            # time.sleep(0.1)  # Uncomment if needed
            
        current_df = pd.DataFrame(result_list, columns=BINANCE_CANDLE_COLUMNS)
        current_df['coin'] = coin
        current_df = current_df[['coin'] + BINANCE_CANDLE_COLUMNS]
        current_df = current_df.values.tolist()
        
        data_list += current_df
        call_dict.update({coin: call})
    
    return {'data': data_list, 'call': call_dict}

# Set endtime to the current time (today)
endtime = int(datetime.utcnow().timestamp() * 1000)

# # Example usage with today's date as the end time
# sample_spot = binance_recursive_fetch_2(
#     ['USDT'],
#     '1m',
#     starttime=int(pd.to_datetime('2023-01-01 00:00', utc=True).timestamp() * 1000),
#     endtime=endtime,
#     data_type='spot'  # Fetch spot data
# )

# print(sample_spot['data'])

  endtime = int(datetime.utcnow().timestamp() * 1000)


# Read Prediction

In [2]:
# Read logreg classification prediction
df = pd.read_csv('/home/ubuntu/Charles/predict_near_volatility/csv/live_test_predictions_csv/live_test_ARB.csv')

df

Unnamed: 0,y_pred_logreg,y_pred_linreg,opentime
0,0,0.012811,2024-12-27T06:00:00.000000
1,0,0.014405,2024-12-27T08:00:00.000000
2,0,0.015705,2024-12-30T02:00:00.000000
3,0,0.015705,2024-12-30T02:00:00.000000
4,0,0.013276,2024-12-30T04:00:00.000000
...,...,...,...
99,0,0.009674,2025-01-03T02:00:00.000000
100,0,0.009154,2025-01-03T03:00:00.000000
101,0,0.009466,2025-01-03T04:00:00.000000
102,0,0.009416,2025-01-03T05:00:00.000000


In [4]:
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score

# Calculate Accuracy
accuracy = accuracy_score(df['y_actual'], df['y_pred'])

# Calculate F1 Score
f1 = f1_score(df['y_actual'], df['y_pred'])

# Print results
print(f"Accuracy: {accuracy:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.93
F1 Score: 0.93


In [5]:
# Read classification prediction
df_2 = pd.read_csv('/home/ubuntu/Rheza/local-share/02_NEAR_USDT/pred_value_last_100_20241712.csv')

df_2

Unnamed: 0,prediction_value
0,0.017412
1,0.017275
2,0.017528
3,0.016673
4,0.017161
...,...
95,0.029086
96,0.025292
97,0.024443
98,0.023211


In [6]:
import pandas as pd

# Concatenate the two DataFrames
df_combined = pd.concat([df, df_2], axis=1)

# Display the combined DataFrame
df_combined

Unnamed: 0,y_pred,y_actual,prediction_value
0,0,0,0.017412
1,0,0,0.017275
2,0,0,0.017528
3,0,0,0.016673
4,0,0,0.017161
...,...,...,...
95,1,1,0.029086
96,1,1,0.025292
97,1,1,0.024443
98,1,0,0.023211


In [7]:
# Add a new column 'classed_prediction_value' based on the condition
df_combined['classed_prediction_value'] = (df_combined['prediction_value'] >= 0.02).astype(int)

# Display the updated DataFrame
df_combined


Unnamed: 0,y_pred,y_actual,prediction_value,classed_prediction_value
0,0,0,0.017412,0
1,0,0,0.017275,0
2,0,0,0.017528,0
3,0,0,0.016673,0
4,0,0,0.017161,0
...,...,...,...,...
95,1,1,0.029086,1
96,1,1,0.025292,1
97,1,1,0.024443,1
98,1,0,0.023211,1


In [132]:
df_combined[(df_combined['y_actual'] != df_combined['classed_prediction_value'])]

Unnamed: 0,y_pred,y_actual,prediction_value,classed_prediction_value
45,0,0,0.020384,1
47,0,1,0.018787,0
49,1,1,0.018933,0
50,0,1,0.016927,0
51,0,1,0.019265,0
55,1,1,0.019365,0
58,1,0,0.021272,1
63,1,1,0.019513,0
98,1,0,0.023211,1
99,0,0,0.022681,1


In [11]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score

# Calculate Accuracy
accuracy = accuracy_score(df_combined['y_actual'], df_combined['classed_prediction_value'])

# Calculate F1 Score
f1 = f1_score(df_combined['y_actual'], df_combined['classed_prediction_value'])

# Print results
print(f"Accuracy: {accuracy:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.90
F1 Score: 0.90


In [146]:
df_combined['new_class_pred_1'] = np.where(
    (df_combined['prediction_value'] <= 0.0200) & (df_combined['classed_prediction_value'] == 1),
    0,  # value if condition is true
    df_combined['classed_prediction_value']  # value if condition is false
)

df_combined

Unnamed: 0,y_pred,y_actual,prediction_value,classed_prediction_value,new_class_pred_1
0,0,0,0.017412,0,0
1,0,0,0.017275,0,0
2,0,0,0.017528,0,0
3,0,0,0.016673,0,0
4,0,0,0.017161,0,0
...,...,...,...,...,...
95,1,1,0.029086,1,1
96,1,1,0.025292,1,1
97,1,1,0.024443,1,1
98,1,0,0.023211,1,1


In [147]:
# Calculate Accuracy
accuracy = accuracy_score(df_combined['y_actual'], df_combined['new_class_pred_1'])

# Calculate F1 Score
f1 = f1_score(df_combined['y_actual'], df_combined['new_class_pred_1'])

# Print results
print(f"Accuracy: {accuracy:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.90
F1 Score: 0.90


In [154]:
df_combined['new_class_pred_2'] = np.where(
    (df_combined['prediction_value'] >= 0.0195) & (df_combined['classed_prediction_value'] == 0),
    1,  # value if condition is true
    df_combined['new_class_pred_1']  # value if condition is false
)

df_combined

Unnamed: 0,y_pred,y_actual,prediction_value,classed_prediction_value,new_class_pred_1,new_class_pred_2
0,0,0,0.017412,0,0,0
1,0,0,0.017275,0,0,0
2,0,0,0.017528,0,0,0
3,0,0,0.016673,0,0,0
4,0,0,0.017161,0,0,0
...,...,...,...,...,...,...
95,1,1,0.029086,1,1,1
96,1,1,0.025292,1,1,1
97,1,1,0.024443,1,1,1
98,1,0,0.023211,1,1,1


In [155]:
# Calculate Accuracy
accuracy = accuracy_score(df_combined['y_actual'], df_combined['new_class_pred_2'])

# Calculate F1 Score
f1 = f1_score(df_combined['y_actual'], df_combined['new_class_pred_2'])

# Print results
print(f"Accuracy: {accuracy:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.91
F1 Score: 0.91


In [8]:
df_combined[(df_combined['y_actual'] != df_combined['y_pred'])]

Unnamed: 0,y_pred,y_actual,prediction_value,classed_prediction_value
47,0,1,0.018787,0
50,0,1,0.016927,0
51,0,1,0.019265,0
52,0,1,0.020351,1
58,1,0,0.021272,1
64,0,1,0.020131,1
98,1,0,0.023211,1


In [9]:
# Calculate Accuracy
accuracy = accuracy_score(df_combined['y_actual'], df_combined['y_pred'])

# Calculate F1 Score
f1 = f1_score(df_combined['y_actual'], df_combined['y_pred'])

# Print results
print(f"Accuracy: {accuracy:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.93
F1 Score: 0.93


In [183]:
df_combined['new_class_pred_3'] = np.where(
    (df_combined['prediction_value'] <= 0.01999) & (df_combined['y_pred'] == 1),
    0,  # value if condition is true
    df_combined['y_pred']  # value if condition is false
)

df_combined

Unnamed: 0,y_pred,y_actual,prediction_value,classed_prediction_value,new_class_pred_1,new_class_pred_2,new_class_pred_3
0,0,0,0.017412,0,0,0,0
1,0,0,0.017275,0,0,0,0
2,0,0,0.017528,0,0,0,0
3,0,0,0.016673,0,0,0,0
4,0,0,0.017161,0,0,0,0
...,...,...,...,...,...,...,...
95,1,1,0.029086,1,1,1,1
96,1,1,0.025292,1,1,1,1
97,1,1,0.024443,1,1,1,1
98,1,0,0.023211,1,1,1,1


In [184]:
# Calculate Accuracy
accuracy = accuracy_score(df_combined['y_actual'], df_combined['new_class_pred_3'])

# Calculate F1 Score
f1 = f1_score(df_combined['y_actual'], df_combined['new_class_pred_3'])

# Print results
print(f"Accuracy: {accuracy:.2f}")

print(f"F1 Score: {f1:.2f}")

Accuracy: 0.90
F1 Score: 0.89


In [26]:
df_combined['new_class_pred_4'] = np.where(
    (df_combined['prediction_value'] >= 0.023) & (df_combined['y_pred'] == 0),
    1,  # value if condition is true
    df_combined['y_pred']  # value if condition is false
)

df_combined

Unnamed: 0,y_pred,y_actual,prediction_value,classed_prediction_value,new_class_pred_4
0,0,0,0.017412,0,0
1,0,0,0.017275,0,0
2,0,0,0.017528,0,0
3,0,0,0.016673,0,0
4,0,0,0.017161,0,0
...,...,...,...,...,...
95,1,1,0.029086,1,1
96,1,1,0.025292,1,1
97,1,1,0.024443,1,1
98,1,0,0.023211,1,1


In [28]:
# Calculate Accuracy
accuracy = accuracy_score(df_combined['y_actual'], df_combined['new_class_pred_4'])

# Calculate F1 Score
f1 = f1_score(df_combined['y_actual'], df_combined['new_class_pred_4'])

# Print results
print(f"Accuracy: {accuracy:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.93
F1 Score: 0.93


# Last all predictions

In [29]:
# Read  logreg classification prediction
df = pd.read_csv('/home/ubuntu/Charles/predict_near_volatility/full_logreg_2024_12_17.csv')

df

Unnamed: 0,y_pred,y_actual
0,0,1
1,0,1
2,1,1
3,1,1
4,1,1
...,...,...
2532,1,1
2533,1,1
2534,1,1
2535,1,0


In [30]:
df_2 = pd.read_csv('/home/ubuntu/Rheza/local-share/02_NEAR_USDT/pred_value_20241712.csv')

df_2

Unnamed: 0,prediction_value
0,0.017733
1,0.016897
2,0.018792
3,0.018227
4,0.018955
...,...
2532,0.029086
2533,0.025292
2534,0.024443
2535,0.023211


In [31]:
import pandas as pd

# Concatenate the two DataFrames
df_combined = pd.concat([df, df_2], axis=1)

# Display the combined DataFrame
df_combined

Unnamed: 0,y_pred,y_actual,prediction_value
0,0,1,0.017733
1,0,1,0.016897
2,1,1,0.018792
3,1,1,0.018227
4,1,1,0.018955
...,...,...,...
2532,1,1,0.029086
2533,1,1,0.025292
2534,1,1,0.024443
2535,1,0,0.023211


In [32]:
# Add a new column 'classed_prediction_value' based on the condition
df_combined['classed_prediction_value'] = (df_combined['prediction_value'] >= 0.02).astype(int)

# Display the updated DataFrame
df_combined

Unnamed: 0,y_pred,y_actual,prediction_value,classed_prediction_value
0,0,1,0.017733,0
1,0,1,0.016897,0
2,1,1,0.018792,0
3,1,1,0.018227,0
4,1,1,0.018955,0
...,...,...,...,...
2532,1,1,0.029086,1
2533,1,1,0.025292,1
2534,1,1,0.024443,1
2535,1,0,0.023211,1


In [33]:
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score

# Calculate Accuracy
accuracy = accuracy_score(df_combined['y_actual'], df_combined['y_pred'])

# Calculate F1 Score
f1 = f1_score(df_combined['y_actual'], df_combined['y_pred'])

# Print results
print(f"Accuracy: {accuracy:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.90
F1 Score: 0.82


In [34]:
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score

# Calculate Accuracy
accuracy = accuracy_score(df_combined['y_actual'], df_combined['classed_prediction_value'])

# Calculate F1 Score
f1 = f1_score(df_combined['y_actual'], df_combined['classed_prediction_value'])

# Print results
print(f"Accuracy: {accuracy:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.89
F1 Score: 0.81


In [35]:
df_combined[(df_combined['y_actual'] != df_combined['classed_prediction_value'])]

Unnamed: 0,y_pred,y_actual,prediction_value,classed_prediction_value
0,0,1,0.017733,0
1,0,1,0.016897,0
2,1,1,0.018792,0
3,1,1,0.018227,0
4,1,1,0.018955,0
...,...,...,...,...
2492,1,1,0.019365,0
2495,1,0,0.021272,1
2500,1,1,0.019513,0
2535,1,0,0.023211,1


In [58]:
df_combined['new_class_pred_1'] = np.where(
    (df_combined['prediction_value'] <= 0.02005) & (df_combined['classed_prediction_value'] == 1),
    0,  # value if condition is true
    df_combined['classed_prediction_value']  # value if condition is false
)

df_combined

Unnamed: 0,y_pred,y_actual,prediction_value,classed_prediction_value,new_class_pred_1
0,0,1,0.017733,0,0
1,0,1,0.016897,0,0
2,1,1,0.018792,0,0
3,1,1,0.018227,0,0
4,1,1,0.018955,0,0
...,...,...,...,...,...
2532,1,1,0.029086,1,1
2533,1,1,0.025292,1,1
2534,1,1,0.024443,1,1
2535,1,0,0.023211,1,1


In [59]:
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score

# Calculate Accuracy
accuracy = accuracy_score(df_combined['y_actual'], df_combined['new_class_pred_1'])

# Calculate F1 Score
f1 = f1_score(df_combined['y_actual'], df_combined['new_class_pred_1'])

# Print results
print(f"Accuracy: {accuracy:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.89
F1 Score: 0.81


In [74]:
df_combined['new_class_pred_2'] = np.where(
    (df_combined['prediction_value'] >= 0.0197) & (df_combined['classed_prediction_value'] == 0),
    1,  # value if condition is true
    df_combined['classed_prediction_value']  # value if condition is false
)

df_combined

Unnamed: 0,y_pred,y_actual,prediction_value,classed_prediction_value,new_class_pred_1,new_class_pred_2
0,0,1,0.017733,0,0,0
1,0,1,0.016897,0,0,0
2,1,1,0.018792,0,0,0
3,1,1,0.018227,0,0,0
4,1,1,0.018955,0,0,0
...,...,...,...,...,...,...
2532,1,1,0.029086,1,1,1
2533,1,1,0.025292,1,1,1
2534,1,1,0.024443,1,1,1
2535,1,0,0.023211,1,1,1


In [75]:
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score

# Calculate Accuracy
accuracy = accuracy_score(df_combined['y_actual'], df_combined['new_class_pred_2'])

# Calculate F1 Score
f1 = f1_score(df_combined['y_actual'], df_combined['new_class_pred_2'])

# Print results
print(f"Accuracy: {accuracy:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.89
F1 Score: 0.81


In [72]:
df_combined[(df_combined['y_actual'] != df_combined['y_pred'])]

Unnamed: 0,y_pred,y_actual,prediction_value,classed_prediction_value,new_class_pred_1,new_class_pred_2
0,0,1,0.017733,0,0,0
1,0,1,0.016897,0,0,0
6,0,1,0.018720,0,0,0
13,0,1,0.018766,0,0,0
14,0,1,0.018155,0,0,0
...,...,...,...,...,...,...
2488,0,1,0.019265,0,0,0
2489,0,1,0.020351,1,1,1
2495,1,0,0.021272,1,1,1
2501,0,1,0.020131,1,1,1


In [96]:
df_combined['new_class_pred_3'] = np.where(
    (df_combined['prediction_value'] >= 0.0195) & (df_combined['y_pred'] == 0),
    1,  # value if condition is true
    df_combined['y_pred']  # value if condition is false
)

df_combined

Unnamed: 0,y_pred,y_actual,prediction_value,classed_prediction_value,new_class_pred_1,new_class_pred_2,new_class_pred_3
0,0,1,0.017733,0,0,0,0
1,0,1,0.016897,0,0,0,0
2,1,1,0.018792,0,0,0,1
3,1,1,0.018227,0,0,0,1
4,1,1,0.018955,0,0,0,1
...,...,...,...,...,...,...,...
2532,1,1,0.029086,1,1,1,1
2533,1,1,0.025292,1,1,1,1
2534,1,1,0.024443,1,1,1,1
2535,1,0,0.023211,1,1,1,1


In [97]:
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score

# Calculate Accuracy
accuracy = accuracy_score(df_combined['y_actual'], df_combined['new_class_pred_3'])

# Calculate F1 Score
f1 = f1_score(df_combined['y_actual'], df_combined['new_class_pred_3'])

# Print results
print(f"Accuracy: {accuracy:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.90
F1 Score: 0.83
