In [91]:
import requests
from pyspark import SparkContext
from pyspark.sql import SQLContext
from itertools import chain, starmap

def flatten_json_iterative_solution(dictionary):
    """Flatten a nested json file"""

    def unpack(parent_key, parent_value):
        """Unpack one level of nesting in json file"""
        # Unpack one level only!!!
        
        if isinstance(parent_value, dict):
            for key, value in parent_value.items():
                temp1 = parent_key + '_' + key
                yield temp1, value
        elif isinstance(parent_value, list):
            i = 0 
            for value in parent_value:
                temp2 = parent_key + '_'+str(i) 
                i += 1
                yield temp2, value
        else:
            yield parent_key, parent_value    

            
    # Keep iterating until the termination condition is satisfied
    while True:
        # Keep unpacking the json file until all values are atomic elements (not dictionary or list)
        dictionary = dict(chain.from_iterable(starmap(unpack, dictionary.items())))
        # Terminate condition: not any value in the json file is dictionary or list
        if not any(isinstance(value, dict) for value in dictionary.values()) and \
           not any(isinstance(value, list) for value in dictionary.values()):
            break

    return dictionary

sc.stop()

sc = SparkContext("local", "NHL Events").getOrCreate()
sqlContext = SQLContext(sc)
r = requests.get('https://statsapi.web.nhl.com/api/v1/game/2019020156/feed/live')
data = r.json()

game_id = data['gamePk']
api_link = data['link']
timestamp = data['metaData']['timeStamp']
gameData = data['gameData']
liveData = data['liveData']

game = gameData['game']
pk = game['pk']
season = game['season']
game_type = game['type']

datetime = gameData['datetime']
start_time = datetime['dateTime']
end_time = datetime['endDateTime']

status = gameData['status']
end_status = status['abstractGameState']

teams = gameData['teams']
away = teams['away']
home = teams['home']

players = gameData['players']

venue = gameData['venue']
venue_name = venue['name']
venue_link = venue['link']

# PLAYS NOT DONE
plays = liveData['plays']
all_plays = plays['allPlays']
scoring_plays = plays['scoringPlays']
penalty_plays = plays['penaltyPlays']
plays_by_period = plays['playsByPeriod']


current_play = plays['currentPlay']
current_play_result = current_play['result']
current_play_about = current_play['about']
current_play_coordinates = current_play['coordinates']

linescore = liveData['linescore']
current_period = linescore['currentPeriod']
current_period_ordinal = linescore['currentPeriodOrdinal']
current_period_time_remaining = linescore['currentPeriodTimeRemaining']
# PERIODS NOT DONE
periods = linescore['periods']

shootout_info = linescore['shootoutInfo']
shootout_info_away = shootout_info['away']
shootout_info_home = shootout_info['home']

linescore_teams = linescore['teams']
# team, goals, shotsOnGoal, goaliePulled, numSkaters, powerPlay
linescore_home = linescore_teams['home']
linescore_away = linescore_teams['away']

power_play_strength = linescore['powerPlayStrength']
has_shootout = linescore['hasShootout']
# intermissionTimeRemaining, intermissionTimeElapsed, inIntermission
intermission_info = linescore['intermissionInfo']
# situationTimeRemaining, situationTimeElapsed, inSituation
powerPlayInfo = linescore['powerPlayInfo']


boxscore = liveData['boxscore']
boxscore_teams = boxscore['teams']

# THESE TW NOT DONE
boxscore_home_team = boxscore_teams['home']
boxscore_away_team = boxscore_teams['away']
boxscore_officials = boxscore['officials']

decisions = liveData['decisions']
winning_team = decisions['winner']
losing_team = decisions['loser']
first_star = decisions['firstStar']
second_star = decisions['secondStar']
third_star = decisions['thirdStar']

shot_pressure = liveData['shotPressure']
game_pressures = shot_pressure['gamePressures']
skater_advantages = shot_pressure['skaterAdvantages']
home_skater_advantages = skater_advantages['homeTeam']
away_skater_advantages = skater_advantages['awayTeam']
# print(shotPressure)

flattened_data = flatten_json_iterative_solution({
    'game_id': game_id,
    'api_link': api_link,
    'timestamp': timestamp
    })



df = sqlContext.read.json(sc.parallelize(boxscore_officials), multiLine=True)
df.printSchema()
df.show(df.count(), False)

root
 |-- official: struct (nullable = true)
 |    |-- fullName: string (nullable = true)
 |    |-- id: long (nullable = true)
 |    |-- link: string (nullable = true)
 |-- officialType: string (nullable = true)

+-------------------------------------------+------------+
|official                                   |officialType|
+-------------------------------------------+------------+
|[Pierre Lambert, 6978, /api/v1/people/6978]|Referee     |
|[Steve Kozari, 2458, /api/v1/people/2458]  |Referee     |
|[Scott Cherrey, 2505, /api/v1/people/2505] |Linesman    |
|[Kory Nagy, 6991, /api/v1/people/6991]     |Linesman    |
+-------------------------------------------+------------+

