In [7]:
from pyspark.sql import SparkSession, functions as F
from pyspark.sql.functions import col, size
from pyspark.sql.types import StructType, DoubleType, StringType
from pyspark.ml.feature import StringIndexer, StandardScaler, VectorAssembler
from pyspark.ml.classification import MultilayerPerceptronClassifier
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
from pyspark.mllib.evaluation import MulticlassMetrics

import time, random, math

In [8]:
sparkSession = SparkSession.builder.master("local[4]").appName("SparkProject").getOrCreate()

In [3]:
start_py = time.time()

# data type
schema = StructType() \
      .add("time_left",DoubleType(),True) \
      .add("ct_score",DoubleType(),True) \
      .add("t_score",DoubleType(),True) \
      .add("map",StringType(),True) \
      .add("bomb_planted",StringType(),True) \
      .add("ct_health",DoubleType(),True) \
      .add("t_health",DoubleType(),True) \
      .add("ct_armor",DoubleType(),True) \
      .add("t_armor",DoubleType(),True) \
      .add("ct_money",DoubleType(),True) \
      .add("t_money",DoubleType(),True) \
      .add("ct_helmets",DoubleType(),True) \
      .add("t_helmets",DoubleType(),True) \
      .add("ct_defuse_kits",DoubleType(),True) \
      .add("ct_players_alive",DoubleType(),True) \
      .add("t_players_alive",DoubleType(),True) \
      .add("round_winner",StringType(),True) \

# call csv to df and select
df = sparkSession.read.option("header",True).schema(schema).csv("csgo_round_snapshots.csv") \
                .select("time_left","ct_score","t_score","map","bomb_planted",
                        "ct_health","t_health","ct_armor","t_armor","ct_money","t_money",
                        "ct_helmets","t_helmets","ct_defuse_kits","ct_players_alive","t_players_alive","round_winner")
df.show(15)

# show label
df = StringIndexer(inputCol="bomb_planted", outputCol="bomb_planted_index").fit(df).transform(df)
df = StringIndexer(inputCol="map", outputCol="map_index").fit(df).transform(df)
df = StringIndexer(inputCol="round_winner", outputCol="label").fit(df).transform(df)

df = df.select("time_left","ct_score","t_score","map_index","bomb_planted_index","ct_health","t_health","ct_armor","t_armor","ct_money","t_money",
                        "ct_helmets","t_helmets","ct_defuse_kits","ct_players_alive","t_players_alive","label")
df.show(15)

# scale data
va = VectorAssembler(inputCols=["time_left","ct_score","t_score","map_index","bomb_planted_index","ct_health","t_health","ct_armor","t_armor","ct_money","t_money",
                        "ct_helmets","t_helmets","ct_defuse_kits","ct_players_alive","t_players_alive"], outputCol="features")

df = va.transform(df).select("features","label")

df.show(15)

for i in range(5):
 start_py_per = time.time()

 # split data into 80:20
 df_train,df_test = df.randomSplit([0.8, 0.2], seed=math.ceil(random.random()*10000))

 # model
 layers = [16,8,2]
 mlp = MultilayerPerceptronClassifier(layers = layers).setLabelCol("label").setFeaturesCol("features").setSeed(1000).setMaxIter(10000)
 model = mlp.fit(df_train)

 # predict
 result = model.transform(df_test)
 print('Fold', (i+1))
 result.show(10)

 # evaluation summary
 result = result.select(['prediction','label'])
 metrics = MulticlassMetrics(result.rdd.map(tuple))
 print(metrics.confusionMatrix().toArray())

 evaluatorAcc = MulticlassClassificationEvaluator(labelCol = 'label', predictionCol = 'prediction', metricName = 'accuracy')
 evaluatorPct = MulticlassClassificationEvaluator(labelCol = 'label', predictionCol = 'prediction', metricName = 'precisionByLabel')
 evaluatorRC = MulticlassClassificationEvaluator(labelCol = 'label', predictionCol = 'prediction', metricName = 'recallByLabel')
 evaluatorF1 = MulticlassClassificationEvaluator(labelCol = 'label', predictionCol = 'prediction', metricName = 'f1')
 mlpacc = evaluatorAcc.evaluate(result)
 mlppct = evaluatorPct.evaluate(result)
 mlprc = evaluatorRC.evaluate(result)
 mlpf1 = evaluatorF1.evaluate(result)
 print("accuracy of predict win team is: ", round(mlpacc,2))
 print("precision of predict win team is: ", round(mlppct,2))
 print("recall of predict win team is: ", round(mlprc,2))
 print("f1 score of predict win team is: ", round(mlpf1,2))
 print("Fold ", (i+1) ,"time:--- %s seconds --- " % (time.time()- start_py_per))

print("pyspark total --- %s seconds --- " % (time.time()- start_py))

+---------+--------+-------+--------+------------+---------+--------+--------+-------+--------+-------+----------+---------+--------------+----------------+---------------+------------+
|time_left|ct_score|t_score|     map|bomb_planted|ct_health|t_health|ct_armor|t_armor|ct_money|t_money|ct_helmets|t_helmets|ct_defuse_kits|ct_players_alive|t_players_alive|round_winner|
+---------+--------+-------+--------+------------+---------+--------+--------+-------+--------+-------+----------+---------+--------------+----------------+---------------+------------+
|    175.0|     0.0|    0.0|de_dust2|       FALSE|    500.0|   500.0|     0.0|    0.0|  4000.0| 4000.0|       0.0|      0.0|           0.0|             5.0|            5.0|          CT|
|   156.03|     0.0|    0.0|de_dust2|       FALSE|    500.0|   500.0|   400.0|  300.0|   600.0|  650.0|       0.0|      0.0|           1.0|             5.0|            5.0|          CT|
|    96.03|     0.0|    0.0|de_dust2|       FALSE|    391.0|   400.0| 



[[10624.  1834.]
 [ 5089.  6908.]]
accuracy of predict win team is:  0.72
precision of predict win team is:  0.68
recall of predict win team is:  0.85
f1 score of predict win team is:  0.71
Fold  1 time:--- 52.657129526138306 seconds --- 
Fold 2
+--------------------+-----+--------------------+--------------------+----------+
|            features|label|       rawPrediction|         probability|prediction|
+--------------------+-----+--------------------+--------------------+----------+
|(16,[0,1,2,3,4,5,...|  1.0|[-1.0366188317604...|[0.12871961887066...|       1.0|
|(16,[0,1,2,3,4,5,...|  1.0|[-1.0366188317604...|[0.12871961887066...|       1.0|
|(16,[0,1,2,3,4,5,...|  0.0|[-1.0366188317604...|[0.12871961887066...|       1.0|
|(16,[0,1,2,3,4,5,...|  1.0|[-1.0366188317604...|[0.12871961887066...|       1.0|
|(16,[0,1,2,3,4,5,...|  1.0|[-1.0366188317604...|[0.12871961887066...|       1.0|
|(16,[0,1,2,3,4,5,...|  1.0|[-1.0366188317604...|[0.12871961887066...|       1.0|
|(16,[0,1,2,3,4,



[[8808. 3591.]
 [2926. 8964.]]
accuracy of predict win team is:  0.73
precision of predict win team is:  0.75
recall of predict win team is:  0.71
f1 score of predict win team is:  0.73
Fold  2 time:--- 153.06307554244995 seconds --- 
Fold 3
+--------------------+-----+--------------------+--------------------+----------+
|            features|label|       rawPrediction|         probability|prediction|
+--------------------+-----+--------------------+--------------------+----------+
|(16,[0,1,2,3,4,5,...|  1.0|[-1.3588984753694...|[0.07196519903633...|       1.0|
|(16,[0,1,2,3,4,5,...|  1.0|[-1.3588984753875...|[0.07196519903476...|       1.0|
|(16,[0,1,2,3,4,5,...|  1.0|[-1.3588984753875...|[0.07196519903476...|       1.0|
|(16,[0,1,2,3,4,5,...|  1.0|[-1.3588984753875...|[0.07196519903476...|       1.0|
|(16,[0,1,2,3,4,5,...|  1.0|[-1.3588984753875...|[0.07196519903476...|       1.0|
|(16,[0,1,2,3,4,5,...|  1.0|[-1.3588984753875...|[0.07196519903476...|       1.0|
|(16,[0,1,2,3,4,5,..