# Parameters
- parameters to be passed in to notebook's payload
- goal is to code more flexible to address and respond successfuly to requirement changes 

In [6]:
# DESC: list of games to filter by
# GOAL: filter Pokemon based on the games in the requirements 
# (scope: Primary Requirements - exercise 1)
gameColors_param = ['Red', 'leafgreen', 'blue', 'white']

# * INBOUND *
# DESC: number of records to be fetched per page (='limit' url's query parameter)
# GOAL: useful to split read requests into smaller chunks 
#         which theoretiaclly 
#       could improve performance or mitigate memory constraints of the computing resources which are running the notebook  
offsetSize_param = 100

# * DEBUG *
# DESC: max number of pages to retrieve
# GOAL: useful when developing code, avoiding to have to wait for fetch process' completion 
# DOMAIN VALUES: 
#   -1 = get ALL PAGES 
#   >1 = get # pages 
maxNumberOfPagesToRetrieve_param = -1

# DESC: max time (secs) execution to wait in case loop gets hung
# GOAL: avoid infinite loop (complementary to 'maxNumberOfPagesToRetrieve_param'). It works like a timeout operation
# DOMAIN VALUES: 
#   -1 = wait indefinetely 
#   >1 = number of seconds to wait
maxTimeExecutionInSecs = 900 # 15mins

# * OUTBOUND *
# DESC: filename's prefix with full path
fileNamePrefixWithFullPath_param = "c:\\temp\\PokemonPortfolio"

# DESC: set single or multiple destination files (partitioned) to be generated 
# GOAL: on BI\Analytics' side, might be important to have data spread across several files 
#       to address performance bottlenecks or parallelize load for faster execution
# (scope: Primary Requirements - exercise 6)
numberOfPartionedFiles_param = 2

# THIS WAS NOT PART OF REQUIREMENTS
# DESC: opt in with 1 of the 2 formats available to generate the destination file
# GOAL: to show versality that could be provided to business users in order to enable them
#       for self-service, and therefore they decide the format that serves them better (naturally more formats could be implemented) 
# DOMAIN VALUES: 'json' | 'csv'
fileFormatOutput_param = 'csv'

#### Primary Requirements (5/6)
\* exercise 6 not included (save to file(s))

In [7]:
import requests
import re
from datetime import datetime

currentOffset = 0
indexDict = 0
pullOut = False

portfolio = []
results = ['dummy']

startTime = datetime.now()

try:
    # IF any of the following conditions is TRUE
    #
    # 1) end of resultset was not reached (results != '[]') 
    # AND
    # 2) execution duration time didn't reach timeout ((datetime.now() - startTime).total_seconds() <= maxTimeExecutionInSecs or maxTimeExecutionInSecs == -1))
    # AND
    # 3) number or pages to be retrived was not reached (currentOffset / offsetSize_param) + 1 <= maxNumberOfPagesToRetrieve_param
    while results != [] and \
            ((datetime.now() - startTime).total_seconds() <= maxTimeExecutionInSecs or maxTimeExecutionInSecs == -1) and \
            ((currentOffset / offsetSize_param) + 1 <= maxNumberOfPagesToRetrieve_param or maxNumberOfPagesToRetrieve_param == -1):

        pullOut = False

        # make request to fetch next chunk of records (currentOffset) from PokeApi
        #  based on its predefined size (offsetSize_param)
        api_url = f"https://pokeapi.co/api/v2/pokemon?offset={currentOffset}&limit={offsetSize_param}"
        response = requests.get(api_url)
        pokemons_list = response.json()
        results = pokemons_list['results']
        
        # if 'results' array is not empty...
        #  lets drill :-)
        if results != []:
            
            # for each pokemon
            for pokemonElement in pokemons_list['results']:
                
                # get pokemon's details through its 'url'
                dummy = requests.get(pokemonElement['url'])
                pokemon = dummy.json()

                # for each 'game_indeces'
                for game_index in pokemon['game_indices']:
                    
                    versionDetails = game_index.get('version')
                    gameColor = versionDetails.get('name')
                    
                    # time to check game colors
                    for gameColor_param in gameColors_param:
                        
                        # if game's color is equal to paramterized colors
                        #  NOTE: .Upper to deal with case-sensitiveness
                        if gameColor_param.upper() in gameColor.upper():
                            
                            # E X E R C I S E  3
                            #
                            # theoretical caution to strip values from existence of non-numeric values 
                            # like cm, inch, .. that might be associated
                            # that's not the real case, but this would prevent errors on calculating BMI (exercice 3)
                            weight = float(re.findall('[0-9]+', str(pokemon.get('weight')))[0])
                            height = float(re.findall('[0-9]+', str(pokemon.get('height')))[0])
                            
                            sprite = pokemon.get('sprites')
                                                    
                            # E X E R C I S E  1
                            # get primary values from API  
                            #
                            # E X E R C I S E  4
                            #  'name': (not the real case) 
                            #  though would be interesting to workaround the use of string.capitilise() and string.title() as 
                            #  this way, it won't touch the words chars from position 1 onwards
                            #  we want to keep the rest of the work intact
                            #
                            # E X E R C I S E  5
                            # add front_default sprite's Url
                            portfolio.append({'id': pokemon.get('id'),
                                            'name': ' '.join(word[0].upper() + word[1:] for word in pokemon.get('name').split()),
                                            'base_experience': pokemon.get('base_experience'),
                                            'weight': weight,
                                            'height': height,
                                            'order': pokemon.get('order'),
                                            'front_default': sprite.get('front_default')})

                            # E X E R C I S E   2
                            # add slot{x}_name
                            #
                            # for each 'type'  
                            for type in pokemon['types']:
                            
                                slot = type.get('slot')
                                
                                #if slot <= slotsToProcess_param:

                                slotType = type.get('type')
                                name = slotType.get('name')

                                portfolio[indexDict][f'slot{slot}_name'] = name
                            
                            # E X E R C I C E   3
                            # calculate BMI with 'weight' and 'height''s values pleaviously cleaned up 
                            portfolio[indexDict]['BMI'] = portfolio[indexDict]['weight'] * portfolio[indexDict]['height']
                            
                            # since all element's data is gathered and saved
                            # we can jump out of nested loops up to 'pokemonElement's loop
                            #  so it can move to next pokemon
                            pullOut = True
                            # control variable for dictionary array
                            indexDict = indexDict + 1

                        # exit 'gameColor_param's loop
                        if pullOut == True: break
                    
                    # exit 'gameIndices' loop   
                    if pullOut == True: break
        
        # set next offset to fetch
        currentOffset = currentOffset + offsetSize_param
        print('next offset: ', currentOffset)

except Exception as e:
    portfolio.clear()
    # in case error occurs, use dictionary with error message so user can
    #  can report a meaningful error to dev team 
    portfolio.append(e)
    print('Error occurred (original message): ', e)

print(f'\nelements # gathered: ', len(portfolio))

next offset:  100
next offset:  200
next offset:  300
next offset:  400
next offset:  500
next offset:  600
next offset:  700
next offset:  800
next offset:  900
next offset:  1000
next offset:  1100
next offset:  1200
next offset:  1300

elements read:  157


### Primary Requirements - exercise 6
includes:
- files partitioning
- export to json | csv

Primary Requirements' exercises are completed with code in next cell 

In [9]:
import json
import csv 

# temporary dictionary with set of elements to output
#  according to the number of partinoned files' parameter
portfolioSet = []

# workbench variables
dictIndex = 0
setPartitionSize = 0

try:
    # calculate the number elements of each partition
    #  could have decimals. To be taken care of later down
    setPartitionSize = len(portfolio) / numberOfPartionedFiles_param

    # loop through partitioned files to generate
    for fileNumber in range(1, numberOfPartionedFiles_param + 1):
        
        # set the upper dictionary limit up to where elements must be feched
        upperDictIndex = dictIndex + int(setPartitionSize)
        
        # IMPORTANT
        # deal with the last partition of elements, in case they don't fully fit the partition size (setPartitionSize)
        # in that case, we must re-set the upper dictionary limit to the exact number of elements of last page, otherwise an error would be thrown
        if upperDictIndex > len(portfolio): upperDictIndex = dictIndex + (len(portfolio) % numberOfPartionedFiles_param)
        
        # all ready now to obtain the subset of elements of partition - stored in portfolioSet
        #  in case file format output param is set to 'json', we first parse it to make sure there
        #  are no errors
        if fileFormatOutput_param == 'json': portfolioSet = json.dumps(portfolio[dictIndex:upperDictIndex], indent = 2)
        else: portfolioSet = portfolio[dictIndex:upperDictIndex]

        # compose filename with template filename_[filepartition#ofnumberpartitionedfiles]_yyyy-mm-dd_h
        #   note: most likely, as portfolio file won't be generated every minute, hour it was generated should be fine at the end of timestamp
        fn = fileNamePrefixWithFullPath_param + \
            '[' + str(fileNumber) + 'of' + str(numberOfPartionedFiles_param) + ']_' + \
            datetime.now().strftime('%Y-%d-%m_%Hh')
            
        fullFilePath = f'{fn}.{fileFormatOutput_param}'

        # newline='' removes \r (blank line) between rows 
        #  important for csv's export
        with open(fullFilePath, 'w', newline='') as f:

            # making file output format configurable was not a requirement of the exercise
            # and therefore
            # in case file format output is csv, I'm deleting 'slot{x}_name' items as they are dynamic and would need additional code to take care of that - header's fields and nulls had to added to dataset accordingly
            # hence
            #   at the end, only static header's fields are part of dataset
            if fileFormatOutput_param == 'csv': 
                for element in portfolioSet:
                    # upper limit of 10 should be enough to remove slot{i}_name
                    for i in range(1, 10):
                        if element.get(f'slot{i}_name'):
                            del element[f'slot{i}_name']
                            
                #print(portfolioSet)
                header= ['id','name','base_experience','weight','height','order','front_default','BMI']
                csvWriter = csv.DictWriter(f, header)
                csvWriter.writeheader()
                csvWriter.writerows(portfolioSet) 
                
            else:
                # json format all the way
                portfolioSet_json = json.dumps(json.loads(portfolioSet), indent=2)

                #print(portfolioSet_json)
                f.write(portfolioSet_json)
        
        # reset next lower dictionary index to fetch elements of next prtition  
        dictIndex = dictIndex + int(setPartitionSize)
       
except Exception as e:
   print('Error occurred (original message): ', e)