import data

In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, unix_timestamp, lag, when, avg
from pyspark.sql.window import Window
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.classification import RandomForestClassifier

# 🚀 Start Spark Session
spark = SparkSession.builder.appName("CryptoTradingBot").getOrCreate()

# 🔹 Sample Data (Bitcoin price, time, volume)
data = [
    ("2024-12-29T11:42:13", 95146.52, 1000),
    ("2024-12-29T11:45:15", 95300.00, 1200),
    ("2024-12-29T11:48:20", 95450.75, 1300),
    ("2024-12-29T11:51:25", 95200.00, 900),
    ("2024-12-29T11:54:30", 95600.50, 1500),
]

columns = ["time", "price", "volume"]
df = spark.createDataFrame(data, columns)

# Convert time to numeric format
df = df.withColumn("time_numeric", unix_timestamp(col("time")))

# Create lagged price & volume (previous values)
window_spec = Window.orderBy("time_numeric")
df = df.withColumn("prev_price", lag("price", 1).over(window_spec))
df = df.withColumn("prev_volume", lag("volume", 1).over(window_spec))

# Compute moving average (5-period window)
df = df.withColumn("ma_price", avg("price").over(Window.orderBy("time_numeric").rowsBetween(-2, 0)))

# Define label (1 = BUY, 0 = HOLD)
df = df.withColumn("label", when(col("price") > col("prev_price"), 1).otherwise(0))

# Remove null values
df = df.na.drop()

# Feature selection
assembler = VectorAssembler(inputCols=["time_numeric", "prev_price", "prev_volume", "ma_price"], outputCol="features")
df = assembler.transform(df).select("features", "label")

# 🔥 Train Random Forest Classifier
rf = RandomForestClassifier(featuresCol="features", labelCol="label", numTrees=100)
model = rf.fit(df)

# Predict Buy/Hold Decision
predictions = model.transform(df)
predictions.select("features", "label", "prediction").show()

In [None]:
# Sell if profit > 5% or loss > 2%
take_profit = 0.05
stop_loss = 0.02

df = df.withColumn(
    "sell_signal",
    when((col("price") > col("prev_price") * (1 + take_profit)), "SELL")
    .when((col("price") < col("prev_price") * (1 - stop_loss)), "SELL")
    .otherwise("HOLD")
)

df.select("time", "price", "sell_signal").show()