In [1]:
import requests
import json

from pyspark.sql.types import IntegerType, StringType, DateType, DoubleType, LongType, StructType, StructField
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, monotonically_increasing_id, current_date
from datetime import date

In [46]:
spark = SparkSession.builder.appName("Pokemon Go ELT").master("local").getOrCreate()

In [47]:
fast_moves = requests.get('https://pogoapi.net/api/v1/fast_moves.json').json()

In [49]:
fast_moves_schema = StructType([
    StructField('id', IntegerType(), True),
    StructField('move_id', IntegerType(), True),
    StructField('stamina_loss_scaler', DoubleType(), True),
    StructField('name', StringType(), True),
    StructField('power', LongType(), True),
    StructField('duration', LongType(), True),
    StructField('energy_delta', LongType(), True),
    StructField('type', StringType(), True),
    StructField('total_damage', LongType(), True),
    StructField('created_at', DateType(), True)
])

fast_moves_dataframe = spark.createDataFrame(fast_moves, fast_moves_schema)

today = date.today().strftime('%d/%m/%Y')

fast_moves_dataframe = fast_moves_dataframe \
    .withColumn('id', monotonically_increasing_id() + 1) \
    .withColumn('total_damage', col('power') * col('duration')) \
    .withColumn('created_at', current_date()) 

fast_moves_dataframe.show(5)

+---+-------+-------------------+-------------+-----+--------+------------+------+------------+----------+
| id|move_id|stamina_loss_scaler|         name|power|duration|energy_delta|  type|total_damage|created_at|
+---+-------+-------------------+-------------+-----+--------+------------+------+------------+----------+
|  1|    200|               0.01|  Fury Cutter|    3|     400|           6|   Bug|        1200|2020-09-04|
|  2|    201|               0.01|     Bug Bite|    5|     500|           6|   Bug|        2500|2020-09-04|
|  3|    202|               0.01|         Bite|    6|     500|           4|  Dark|        3000|2020-09-04|
|  4|    203|               0.01| Sucker Punch|    7|     700|           8|  Dark|        4900|2020-09-04|
|  5|    204|               0.01|Dragon Breath|    6|     500|           4|Dragon|        3000|2020-09-04|
+---+-------+-------------------+-------------+-----+--------+------------+------+------------+----------+
only showing top 5 rows



In [50]:
fast_moves_dataframe.printSchema()

root
 |-- id: long (nullable = false)
 |-- move_id: integer (nullable = true)
 |-- stamina_loss_scaler: double (nullable = true)
 |-- name: string (nullable = true)
 |-- power: long (nullable = true)
 |-- duration: long (nullable = true)
 |-- energy_delta: long (nullable = true)
 |-- type: string (nullable = true)
 |-- total_damage: long (nullable = true)
 |-- created_at: date (nullable = false)



In [51]:
fast_moves_dataframe.toPandas().to_csv('pokemon.csv', index=False)

In [52]:
pokemon_dataframe = spark.read.csv('pokemon.csv', header=True)

In [53]:
pokemon_dataframe.show(5)

+---+-------+-------------------+-------------+-----+--------+------------+------+------------+----------+
| id|move_id|stamina_loss_scaler|         name|power|duration|energy_delta|  type|total_damage|created_at|
+---+-------+-------------------+-------------+-----+--------+------------+------+------------+----------+
|  1|    200|               0.01|  Fury Cutter|    3|     400|           6|   Bug|        1200|2020-09-04|
|  2|    201|               0.01|     Bug Bite|    5|     500|           6|   Bug|        2500|2020-09-04|
|  3|    202|               0.01|         Bite|    6|     500|           4|  Dark|        3000|2020-09-04|
|  4|    203|               0.01| Sucker Punch|    7|     700|           8|  Dark|        4900|2020-09-04|
|  5|    204|               0.01|Dragon Breath|    6|     500|           4|Dragon|        3000|2020-09-04|
+---+-------+-------------------+-------------+-----+--------+------------+------+------------+----------+
only showing top 5 rows

