In [2]:
from pyspark.sql import SparkSession
from pyspark.ml.regression import LinearRegression
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.evaluation import RegressionEvaluator

spark = SparkSession.builder.appName("IstanbulStockExchange").getOrCreate()

file_path = "istanbul_stock_exchange.csv" 
df = spark.read.csv(file_path, header=True, inferSchema=True)

feature_cols = ["SP", "DAX", "FTSE", "NIKKEI", "BOVESPA", "EU", "EM"]
target_col = "ISE_TL"

assembler = VectorAssembler(inputCols=feature_cols, outputCol="features")
df = assembler.transform(df).select("features", target_col)

train_data, test_data = df.randomSplit([0.8, 0.2], seed=42)

lr = LinearRegression(featuresCol="features", labelCol=target_col)
model = lr.fit(train_data)

predictions = model.transform(test_data)

evaluator = RegressionEvaluator(labelCol=target_col, predictionCol="prediction", metricName="rmse")
rmse = evaluator.evaluate(predictions)
r2 = RegressionEvaluator(labelCol=target_col, predictionCol="prediction", metricName="r2").evaluate(predictions)

print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R² Score: {r2}")

print(f"Coefficients: {model.coefficients}")
print(f"Intercept: {model.intercept}")

spark.stop()


Root Mean Squared Error (RMSE): 0.01178993952682364
R² Score: 0.5595270609771446
Coefficients: [0.010846798030471613,-0.06610063279295464,-0.07621718952388333,-0.07397775438793153,-0.11090787123973295,0.734215921517111,0.5256569746859336]
Intercept: 0.0012064570770679622


NameError: name 'SparkSession' is not defined