In [2]:
import os
import io
import pandas as pd
from azure.storage.blob import BlobServiceClient
from dotenv import load_dotenv

def download_arbitrage_data(container_name="arbitrage-data", blob_name="arbitrage_history.csv"):
    """
    Download arbitrage data from Azure Blob Storage and return as a pandas DataFrame
    
    Args:
        container_name (str): Name of the container in Azure Blob Storage
        blob_name (str): Name of the blob file to download
        
    Returns:
        pandas.DataFrame: The downloaded arbitrage data
    """
    # Load environment variables
    load_dotenv()
    
    # Azure Blob Storage connection details
    connection_string = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
    
    try:
        # Initialize the BlobServiceClient
        blob_service_client = BlobServiceClient.from_connection_string(connection_string)
        container_client = blob_service_client.get_container_client(container_name)
        
        # Get blob client
        blob_client = container_client.get_blob_client(blob_name)
        
        # Download the blob content
        download_stream = blob_client.download_blob()
        
        # Convert to pandas DataFrame
        arbitrage_data = pd.read_csv(io.StringIO(download_stream.content_as_text()))
        
        print(f"Successfully downloaded and loaded {blob_name}")
        print(f"DataFrame shape: {arbitrage_data.shape}")
        return arbitrage_data
        
    except Exception as e:
        print(f"Error downloading or loading data: {str(e)}")
        return None



In [3]:
df = download_arbitrage_data()
df.tail()

Successfully downloaded and loaded arbitrage_history.csv
DataFrame shape: (70000, 8)


Unnamed: 0,coin,profit_percentage,best_path,direct_value,btc_path_value,eth_path_value,usdc_path_value,timestamp
69995,APT,0.0,Direct,100.0,99.996232,99.970466,99.931892,2025-03-24 17:55:05
69996,ADA,0.0,Direct,100.0,99.992479,99.919576,100.0,2025-03-24 17:55:05
69997,LINK,0.0,Direct,100.0,99.997486,99.997383,100.0,2025-03-24 17:55:05
69998,SOL,0.0,Direct,100.0,99.979284,99.972674,99.992971,2025-03-24 17:55:05
69999,XRP,0.0,Direct,100.0,99.996209,99.967905,99.995954,2025-03-24 17:55:05


In [6]:
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import numpy as np
def best_latest_arbitrage_opportunities(df, top_n=5):
    temp = df.copy()
    temp = temp[temp['timestamp'] == temp['timestamp'].max()]
    temp = temp.sort_values('profit_percentage', ascending=False).head(top_n)
    return temp['coin'].values

# df = pd.read_csv('arbitrage_history.csv')
df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df[df.timestamp > datetime.strftime(datetime.now() - timedelta(hours=5) - timedelta(minutes=10), '%Y-%m-%d %H:%M:%S')]
df = df[df.coin.isin(best_latest_arbitrage_opportunities(df, 5))]
df = df[['coin','best_path', 'profit_percentage','timestamp']]

# # Create a figure and axis
# plt.figure(figsize=(14, 8))

# # Plot each coin's profit percentage over time
# for coin in df['coin'].unique():
#     coin_data = df[df['coin'] == coin]
#     plt.plot(coin_data['timestamp'], coin_data['profit_percentage'], marker='o', label=coin)

# # Add labels and title
# plt.xlabel('Timestamp')
# plt.ylabel('Profit Percentage (%)')
# plt.title('Arbitrage Opportunities Over Time')
# plt.legend()
# plt.grid(True)

# # Format x-axis to show dates nicely
# plt.xticks(rotation=45)
# plt.tight_layout()

# # Show the plot
# plt.show()

Unnamed: 0,coin,best_path,profit_percentage,timestamp


In [None]:
df[df.coin == 'GALA'].set_index('timestamp').plot()