In [1]:
pip install google-cloud-bigquery

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
from pyspark.sql import SparkSession

# Path to your service account key
service_account_path = "creds.json"

# Your Google Cloud project ID
project_id = "bigdata-421623"

spark = SparkSession.builder \
    .appName("BigQuery Integration") \
    .config("spark.jars.packages", "com.google.cloud.spark:spark-bigquery-with-dependencies_2.12:0.21.0") \
    .config("credentialsFile", service_account_path) \
    .config("parentProject", project_id) \
    .getOrCreate()



:: loading settings :: url = jar:file:/opt/conda/envs/bigdata/lib/python3.10/site-packages/pyspark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /home/jovyan/.ivy2/cache
The jars for the packages stored in: /home/jovyan/.ivy2/jars
com.google.cloud.spark#spark-bigquery-with-dependencies_2.12 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-1a2e507e-084f-4fb5-ad0f-332746562d44;1.0
	confs: [default]
	found com.google.cloud.spark#spark-bigquery-with-dependencies_2.12;0.21.0 in central
:: resolution report :: resolve 305ms :: artifacts dl 13ms
	:: modules in use:
	com.google.cloud.spark#spark-bigquery-with-dependencies_2.12;0.21.0 from central in [default]
	---------------------------------------------------------------------
	|                  |            modules            ||   artifacts   |
	|       conf       | number| search|dwnlded|evicted|| number|dwnlded|
	---------------------------------------------------------------------
	|      default     |   1   |   0   |   0   |   0   ||   1   |   0   |
	----------------------------------------------------------------

In [3]:
# Specify your BigQuery table
table = "bigdata-421623.ForEx_Big_Data.Hourly_Forex"

# Load the data into a DataFrame
df = spark.read \
    .format("bigquery") \
    .option("table", table) \
    .load()

# Show the first few rows of the DataFrame
df.show()


                                                                                

+-------------+-------------+------------+------------+-------------+-------------------+------+-----------------+--------+--------------------+
|closing_price|highest_price|lowest_price|transactions|opening_price|               time|volume|avg_volume_weight|  ticker|          created_at|
+-------------+-------------+------------+------------+-------------+-------------------+------+-----------------+--------+--------------------+
|      1.05685|      1.05772|      1.0564|        6162|      1.05765|2023-02-28 19:00:00|  6162|           1.0568|C:EURUSD|2024-04-28 12:04:...|
|      1.05841|       1.0586|      1.0566|        7986|      1.05684|2023-02-28 20:00:00|  7986|           1.0576|C:EURUSD|2024-04-28 12:04:...|
|      1.05831|      1.05866|       1.058|        4862|      1.05848|2023-02-28 21:00:00|  4862|           1.0584|C:EURUSD|2024-04-28 12:04:...|
|      1.05929|       1.0596|      1.0581|        5067|       1.0583|2023-02-28 22:00:00|  5067|            1.059|C:EURUSD|2024-04

In [4]:
# Assuming tickers are in a format like 'C:EURUSD', let's extract the unique tickers
tickers = df.select("ticker").distinct().rdd.flatMap(lambda x: x).collect()


                                                                                

In [5]:
# Parse out unique currencies from tickers
currencies = set()
for ticker in tickers:
    base, quote = ticker.split("C:")[1][:3], ticker.split("C:")[1][3:]
    currencies.update([base, quote])

In [6]:
# Identify valid triangles
currency_triangles = []
for base in currencies:
    for via in currencies:
        for quote in currencies:
            if base != via and via != quote and base != quote:
                ticker1 = f"C:{base}{via}"
                ticker2 = f"C:{via}{quote}"
                ticker3 = f"C:{quote}{base}"
                if all(t in tickers for t in [ticker1, ticker2, ticker3]):
                    currency_triangles.append((ticker1, ticker2, ticker3))

In [10]:
from pyspark.sql.functions import col, when, first
# Define a threshold for arbitrage opportunities
threshold = 0.01

# Check each triangle for arbitrage opportunities
for triangle in currency_triangles:
    ticker1, ticker2, ticker3 = triangle

    # Pivot the DataFrame to align closing prices by ticker and time
    pivot_df = df.filter(
        df["ticker"].isin(ticker1, ticker2, ticker3)
    ).groupBy("time").pivot("ticker").agg(first("closing_price").alias("closing_price"))

    # Calculate arbitrage opportunity result including the threshold
    arb_df = pivot_df.withColumn(
        "arbitrage_result",
        when(
            (col(ticker1) * col(ticker2) / col(ticker3) - 1) > threshold, "Opportunity"
        ).otherwise("None")
    )

    # Filter for rows where an opportunity is found and display them
    arb_df.filter(col("arbitrage_result") == "Opportunity").show()

                                                                                

+----+--------+--------+--------+----------------+
|time|C:EURUSD|C:JPYEUR|C:USDJPY|arbitrage_result|
+----+--------+--------+--------+----------------+
+----+--------+--------+--------+----------------+



                                                                                

+----+--------+--------+--------+----------------+
|time|C:GBPUSD|C:JPYGBP|C:USDJPY|arbitrage_result|
+----+--------+--------+--------+----------------+
+----+--------+--------+--------+----------------+



                                                                                

+-------------------+---------+-------------------+--------+----------------+
|               time| C:EURJPY|           C:JPYUSD|C:USDEUR|arbitrage_result|
+-------------------+---------+-------------------+--------+----------------+
|2023-10-20 02:00:00|  158.498|          0.0066707| 0.94567|     Opportunity|
|2023-04-30 18:00:00|  149.987|          0.0073382| 0.90812|     Opportunity|
|2023-07-10 21:00:00|  155.296|          0.0070878| 0.90827|     Opportunity|
|2023-03-19 20:00:00|141.63738|         0.00754028| 0.93622|     Opportunity|
|2023-07-04 05:00:00|  157.395|           0.006923| 0.91754|     Opportunity|
|2024-01-28 22:00:00|  160.652|          0.0067494| 0.92204|     Opportunity|
|2023-07-05 23:00:00|  156.349|          0.0069321| 0.92246|     Opportunity|
|2023-12-14 02:00:00|  154.522|          0.0070404| 0.91897|     Opportunity|
|2023-10-31 04:00:00|  160.111|          0.0066478| 0.93936|     Opportunity|
|2023-04-10 06:00:00|  144.283|          0.0075569| 0.91688|    

[Stage 62:>                                                         (0 + 1) / 1]

+----+--------+--------+--------+----------------+
|time|C:EURGBP|C:GBPUSD|C:USDEUR|arbitrage_result|
+----+--------+--------+--------+----------------+
+----+--------+--------+--------+----------------+



                                                                                

+-------------------+--------+-------------------+---------+----------------+
|               time|C:GBPJPY|           C:JPYUSD| C:USDGBP|arbitrage_result|
+-------------------+--------+-------------------+---------+----------------+
|2023-04-10 01:00:00|  164.52|          0.0075405|  0.80582|     Opportunity|
|2023-10-20 02:00:00| 181.522|          0.0066707|  0.82572|     Opportunity|
|2023-07-10 21:00:00| 181.546|          0.0070878|0.7769706|     Opportunity|
|2023-03-19 20:00:00|  161.68|         0.00754028|  0.82013|     Opportunity|
|2023-11-22 10:00:00| 186.478|          0.0066801|  0.80262|     Opportunity|
|2023-06-23 17:00:00|  182.46|         0.00695894|0.7864727|     Opportunity|
|2023-10-31 04:00:00| 183.081|          0.0066478|   0.8215|     Opportunity|
|2023-07-05 23:00:00| 183.088|          0.0069321|   0.7877|     Opportunity|
|2023-12-14 02:00:00|  179.21|          0.0070404|  0.79237|     Opportunity|
|2024-03-13 00:00:00| 188.741|          0.0067756|  0.78175|    

                                                                                

+----+--------+--------+--------+----------------+
|time|C:EURUSD|C:GBPEUR|C:USDGBP|arbitrage_result|
+----+--------+--------+--------+----------------+
+----+--------+--------+--------+----------------+

+----+--------+--------+--------+----------------+
|time|C:EURJPY|C:JPYUSD|C:USDEUR|arbitrage_result|
+----+--------+--------+--------+----------------+
+----+--------+--------+--------+----------------+

+----+--------+--------+--------+----------------+
|time|C:GBPJPY|C:JPYUSD|C:USDGBP|arbitrage_result|
+----+--------+--------+--------+----------------+
+----+--------+--------+--------+----------------+

+----+--------+--------+--------+----------------+
|time|C:EURUSD|C:JPYEUR|C:USDJPY|arbitrage_result|
+----+--------+--------+--------+----------------+
+----+--------+--------+--------+----------------+

+----+--------+--------+--------+----------------+
|time|C:EURGBP|C:GBPJPY|C:JPYEUR|arbitrage_result|
+----+--------+--------+--------+----------------+
+----+--------+--------+---

                                                                                

+-------------------+--------+--------+--------+----------------+
|               time|C:EURGBP|C:GBPUSD|C:USDEUR|arbitrage_result|
+-------------------+--------+--------+--------+----------------+
|2023-09-15 02:00:00| 0.85786| 1.24264|   0.938|     Opportunity|
|2023-12-04 10:00:00| 0.85759| 1.26103| 0.92462|     Opportunity|
|2023-10-31 04:00:00| 0.87454| 1.21723| 0.93936|     Opportunity|
|2023-07-04 05:00:00| 0.85828| 1.26976| 0.91754|     Opportunity|
|2023-12-28 14:00:00| 0.86938| 1.27264| 0.90376|     Opportunity|
|2023-03-13 17:00:00| 0.87993| 1.21811| 0.93188|     Opportunity|
|2024-01-28 22:00:00| 0.85374| 1.27022| 0.92204|     Opportunity|
|2024-03-13 00:00:00| 0.85397| 1.27902|  0.9154|     Opportunity|
|2023-04-30 18:00:00| 0.87688| 1.25562| 0.90812|     Opportunity|
|2023-07-05 23:00:00|  0.8539| 1.26932| 0.92246|     Opportunity|
|2023-04-10 06:00:00| 0.87738| 1.24297| 0.91688|     Opportunity|
|2023-06-30 08:00:00| 0.85902|  1.2685| 0.91762|     Opportunity|
|2023-07-1