# Scraping Pokemon Wikipedia articles

In [1]:
# import the needed libraries
import requests
import pandas as pd
import re
import os


In [2]:
# create article directory
directory = "\\articles"

parentDirectory = os.getcwdb().decode('utf-8')

path = parentDirectory + directory

try:
    os.mkdir(path)
except OSError as error:
    print("You've already created this Folder Dummy!")

You've already created this Folder Dummy!


In [3]:
df = pd.read_excel('generations/generation1.xls')

In [4]:
df

Unnamed: 0,Name
0,Bulbasaur †
1,Ivysaur
2,Venusaur
3,Charmander†
4,Charmeleon
...,...
146,Dratini
147,Dragonair
148,Dragonite
149,Mewtwo ‡


In [5]:
# clean up data a bit
for i in range(len(df['Name'])):
    df['Name'][i] = re.sub(r'[^A-Za-z]', '', df['Name'][i])
    

In [10]:
df['Name'][53]

'Psyduck'

## Algorithm and Test

In [3]:
# get an article
url = 'https://en.wikipedia.org/api/rest_v1/page/pdf/Pikachu%20(Pokémon)' 
response = requests.get(url)
response.raise_for_status()

In [33]:
type(response.status_code)

int

In [4]:
with open('articles/Pikachu.pdf', 'wb') as f:
    f.write(response.content)

### Get articles for every Generation 1 pokemon 1

In [None]:
for pokemon in df['Name']:
    url = 'https://en.wikipedia.org/api/rest_v1/page/pdf/' 
    url_ending = '%20(Pokémon)'
    response = requests.get(url + pokemon + url_ending)
    if response.status_code != 200:
        continue

    with open(f'articles/{pokemon}.pdf', 'wb') as f:
        f.write(response.content)

## Running the Code above lead me to discover that wikipedia doesn't have a unique article for every pokemon so I modifed the code above to all scrape the unique articles.

In [25]:
url = 'https://en.wikipedia.org/api/rest_v1/page/pdf/Beedrill%20(Pokémon)' 
response = requests.get(url)
response.raise_for_status()
with open('articles/Beedrill.pdf', 'wb') as f:
    f.write(response.content)

In [26]:
unUniqueArticle = response.content

## New and Improved "Get article for every Generation 1 pokemon"

In [9]:
done = []
sumOfUniqueArticles = 0

In [28]:
for pokemon in df['Name'][0:51]:
    if pokemon not in done:
        done.append(pokemon)
    else:
        continue
    print("Pokemon:", pokemon)
    url = 'https://en.wikipedia.org/api/rest_v1/page/pdf/' 
    url_ending = '%20(Pokémon)'
    response = requests.get(url + pokemon + url_ending)
    if response.status_code != 200:
        continue

    if response.content == unUniqueArticle:
        continue

    with open(f'articles/{pokemon}.pdf', 'wb') as f:
        f.write(response.content)
        sumOfUniqueArticles = sumOfUniqueArticles + 1
        print("Article Dowloaded\n")

Pokemon: Bulbasaur
Article Dowloaded

Pokemon: Ivysaur
Pokemon: Venusaur
Article Dowloaded

Pokemon: Charmander
Article Dowloaded

Pokemon: Charmeleon
Pokemon: Charizard
Article Dowloaded

Pokemon: Squirtle
Article Dowloaded

Pokemon: Wartortle
Pokemon: Blastoise
Pokemon: Caterpie
Pokemon: Metapod
Pokemon: Butterfree
Pokemon: Weedle
Pokemon: Kakuna
Pokemon: Beedrill
Pokemon: Pidgey
Pokemon: Pidgeotto
Pokemon: Pidgeot
Pokemon: Rattata
Pokemon: Raticate
Pokemon: Spearow
Pokemon: Fearow
Pokemon: Ekans
Pokemon: Arbok
Pokemon: Pikachud
Pokemon: Raichu
Pokemon: Sandshrew
Pokemon: Sandslash
Pokemon: Nidoran
Pokemon: Nidorina
Pokemon: Nidoqueen
Pokemon: Nidorino
Pokemon: Nidoking
Pokemon: Clefairy
Pokemon: Clefable
Pokemon: Vulpix
Pokemon: Ninetales
Pokemon: Jigglypuff
Article Dowloaded

Pokemon: Wigglytuff
Pokemon: Zubat
Pokemon: Golbat
Pokemon: Oddish
Pokemon: Gloom
Pokemon: Vileplume
Pokemon: Paras
Pokemon: Parasect
Pokemon: Venonat
Pokemon: Venomoth
Pokemon: Diglett
Pokemon: Dugtrio


In [29]:
for pokemon in df['Name'][51:101]:
    if pokemon not in done:
        done.append(pokemon)
    else:
        continue
    print("Pokemon:", pokemon)
    url = 'https://en.wikipedia.org/api/rest_v1/page/pdf/' 
    url_ending = '%20(Pokémon)'
    response = requests.get(url + pokemon + url_ending)
    if response.status_code != 200:
        continue

    if response.content == unUniqueArticle:
        continue

    with open(f'articles/{pokemon}.pdf', 'wb') as f:
        f.write(response.content)
        sumOfUniqueArticles = sumOfUniqueArticles + 1
        print("Article Dowloaded\n")

Pokemon: Meowth
Article Dowloaded

Pokemon: Persian
Pokemon: Psyduck
Article Dowloaded

Pokemon: Golduck
Pokemon: Mankey
Pokemon: Primeape
Pokemon: Growlithe
Pokemon: Arcanine
Pokemon: Poliwag
Pokemon: Poliwhirl
Pokemon: Poliwrath
Pokemon: Abra
Pokemon: Kadabra
Article Dowloaded

Pokemon: Alakazam
Pokemon: Machop
Pokemon: Machoke
Pokemon: Machamp
Pokemon: Bellsprout
Pokemon: Weepinbell
Pokemon: Victreebel
Pokemon: Tentacool
Pokemon: Tentacruel
Pokemon: Geodude
Pokemon: Graveler
Pokemon: Golem
Pokemon: Ponyta
Pokemon: Rapidash
Pokemon: Slowpoke
Pokemon: Slowbro
Pokemon: Magnemite
Pokemon: Magneton
Pokemon: Farfetchd
Pokemon: Doduo
Pokemon: Dodrio
Pokemon: Seel
Pokemon: Dewgong
Pokemon: Grimer
Pokemon: Muk
Pokemon: Shellder
Pokemon: Cloyster
Pokemon: Gastly
Pokemon: Haunter
Pokemon: Gengar
Pokemon: Onix
Pokemon: Drowzee
Pokemon: Hypno
Pokemon: Krabby
Pokemon: Kingler
Pokemon: Voltorb
Pokemon: Electrode


In [30]:
for pokemon in df['Name'][101:]:
    if pokemon not in done:
        done.append(pokemon)
    else:
        continue
    print("Pokemon:", pokemon)
    url = 'https://en.wikipedia.org/api/rest_v1/page/pdf/' 
    url_ending = '%20(Pokémon)'
    response = requests.get(url + pokemon + url_ending)
    if response.status_code != 200:
        continue

    if response.content == unUniqueArticle:
        continue

    with open(f'articles/{pokemon}.pdf', 'wb') as f:
        f.write(response.content)
        sumOfUniqueArticles = sumOfUniqueArticles + 1
        print("Article Dowloaded\n")

Pokemon: Exeggcute
Pokemon: Exeggutor
Pokemon: Cubone
Pokemon: Marowak
Pokemon: Hitmonlee
Pokemon: Hitmonchan
Pokemon: Lickitung
Pokemon: Koffing
Pokemon: Weezing
Pokemon: Rhyhorn
Pokemon: Rhydon
Pokemon: Chansey
Pokemon: Tangela
Pokemon: Kangaskhan
Pokemon: Horsea
Pokemon: Seadra
Pokemon: Goldeen
Pokemon: Seaking
Pokemon: Staryu
Pokemon: Starmie
Pokemon: MrMime
Pokemon: Scyther
Pokemon: Jynx
Article Dowloaded

Pokemon: Electabuzz
Pokemon: Magmar
Pokemon: Pinsir
Pokemon: Tauros
Pokemon: Magikarp
Article Dowloaded

Pokemon: Gyarados
Article Dowloaded

Pokemon: Lapras
Article Dowloaded

Pokemon: Ditto
Pokemon: Eeveee
Pokemon: Vaporeon
Pokemon: Jolteon
Pokemon: Flareon
Pokemon: Porygon
Pokemon: Omanyte
Pokemon: Omastar
Pokemon: Kabuto
Pokemon: Kabutops
Pokemon: Aerodactyl
Pokemon: Snorlax
Article Dowloaded

Pokemon: Articuno
Pokemon: Zapdos
Pokemon: Moltres
Pokemon: Dratini
Pokemon: Dragonair
Pokemon: Dragonite
Pokemon: Mewtwo
Article Dowloaded

Pokemon: Mew
Article Dowloaded



In [31]:
sumOfUniqueArticles

16

# Now for the remain Generations

In [5]:
# first I need reuse able code not DRY code
url = 'https://en.wikipedia.org/api/rest_v1/page/pdf/Beedrill%20(Pokémon)' 
response = requests.get(url)
response.raise_for_status()
unUniqueArticle = response.content


def cleanPokemonName(pokemon):
    return re.sub('[^A-za-z]', '', pokemon)

def getRawArticle(pokemon):
    url = 'https://en.wikipedia.org/api/rest_v1/page/pdf/' 
    url_ending = '%20(Pokémon)'
    response = requests.get(url + pokemon + url_ending)
    if response.status_code != 200:
        return None

    return response.content

def rawArticletoPDF(article, pokemon):
    with open(f'articles/{pokemon}.pdf', 'wb') as f:
        print("Pokemon: ", pokemon)
        f.write(article)
        print("Article Dowloaded\n")
    

In [6]:
# load all the generations
gen2 = pd.read_excel('generations/generation2.xls')
gen3 = pd.read_excel('generations/generation3.xls')
gen4 = pd.read_excel('generations/generation4.xls')
gen5 = pd.read_excel('generations/generation5.xls')
gen6 = pd.read_excel('generations/generation6.xls')
gen7 = pd.read_excel('generations/generation7.xls')
gen8 = pd.read_excel('generations/generation8.xls')
gen9 = pd.read_excel('generations/generation9.xls')

In [7]:
# clean up data
for i in range(len(gen2['Name'])):
    gen2['Name'][i] = cleanPokemonName(gen2['Name'][i])

for i in range(len(gen3['Name'])):
    gen3['Name'][i] = cleanPokemonName(gen3['Name'][i])

for i in range(len(gen4['Name'])):
    gen4['Name'][i] = cleanPokemonName(gen4['Name'][i])

for i in range(len(gen5['Name'])):
    gen5['Name'][i] = cleanPokemonName(gen5['Name'][i])

for i in range(len(gen6['Name'])):
    gen6['Name'][i] = cleanPokemonName(gen6['Name'][i])

for i in range(len(gen7['Name'])):
    gen7['Name'][i] = cleanPokemonName(gen7['Name'][i])

for i in range(len(gen8['Name'])):
    gen8['Name'][i] = cleanPokemonName(gen8['Name'][i])

for i in range(len(gen9['Name'])):
    gen9['Name'][i] = cleanPokemonName(gen9['Name'][i])

In [10]:
sumOfUniqueArticles

0

## Get Gen 2 Pokemon articles

In [51]:
len(gen2)
done2 = {}

In [53]:
# special function changes with each generation
url = 'https://en.wikipedia.org/api/rest_v1/page/pdf/Chikorita%20(Pokémon)' 
response = requests.get(url)
response.raise_for_status()
unUniqueArticle = response.content


def checkArticleUniqueness(article):
    if article != unUniqueArticle:
        return True
    else:
        return False

In [None]:
for pokemon in gen2['Name'][0:50]:
    
    if done2.get(pokemon):
        continue
    else:
        done2[pokemon] = pokemon
    
    print("Pokemon: ", pokemon)
    article = getRawArticle(pokemon)

    if article != None:

        if checkArticleUniqueness(article):
            sumOfUniqueArticles = sumOfUniqueArticles + 1
            rawArticletoPDF(article, pokemon)
        else:
            continue

    else:
        continue

In [None]:
for pokemon in gen2['Name'][50:]:

    if done2.get(pokemon):
        continue
    else:
        done2[pokemon] = pokemon

    print("Pokemon: ", pokemon)
    article = getRawArticle(pokemon)

    if article != None:

        if checkArticleUniqueness(article):
            sumOfUniqueArticles = sumOfUniqueArticles + 1
            rawArticletoPDF(article, pokemon)
        else:
            continue

    else:
        continue

In [60]:
len(done2)

100

# Gen 3 Pokemon

In [67]:
len(gen3)
done3 = {}

In [69]:
# special function changes with each generation
url = 'https://en.wikipedia.org/api/rest_v1/page/pdf/Treecko%20(Pokémon)' 
response = requests.get(url)
response.raise_for_status()
#rawArticletoPDF(article, 'Treecko')
unUniqueArticle = response.content


def checkArticleUniqueness(article):
    if article != unUniqueArticle:
        return True
    else:
        return False

In [70]:
for pokemon in gen3['Name'][0:50]:

    if done3.get(pokemon):
        continue
    else:
        done3[pokemon] = pokemon

    print("Pokemon: ", pokemon)
    article = getRawArticle(pokemon)

    if article != None:

        if checkArticleUniqueness(article):
            sumOfUniqueArticles = sumOfUniqueArticles + 1
            rawArticletoPDF(article, pokemon)
        else:
            continue

    else:
        continue

Pokemon:  Treecko
Pokemon:  Grovyle
Pokemon:  Sceptile
Pokemon:  Torchic
Pokemon:  Combusken
Pokemon:  Blaziken
Pokemon:  Mudkip
Pokemon:  Marshtomp
Pokemon:  Swampert
Pokemon:  Poochyena
Pokemon:  Mightyena
Pokemon:  Zigzagoon
Pokemon:  Linoone
Pokemon:  Wurmple
Pokemon:  Silcoon
Pokemon:  Beautifly
Pokemon:  Cascoon
Pokemon:  Dustox
Pokemon:  Lotad
Pokemon:  Lombre
Pokemon:  Ludicolo
Pokemon:  Seedot
Pokemon:  Nuzleaf
Pokemon:  Shiftry
Pokemon:  Taillow
Pokemon:  Swellow
Pokemon:  Wingull
Pokemon:  Pelipper
Pokemon:  Ralts
Pokemon:  Kirlia
Pokemon:  Gardevoir
Pokemon:  Surskit
Pokemon:  Masquerain
Pokemon:  Shroomish
Pokemon:  Breloom
Pokemon:  Slakoth
Pokemon:  Vigoroth
Pokemon:  Slaking
Pokemon:  Nincada
Pokemon:  Ninjask
Pokemon:  Shedinja
Pokemon:  Whismur
Pokemon:  Loudred
Pokemon:  Exploud
Pokemon:  Makuhita
Pokemon:  Hariyama
Pokemon:  Azurill
Pokemon:  Nosepass
Pokemon:  Skitty
Pokemon:  Delcatty


In [71]:
for pokemon in gen3['Name'][50:100]:

    if done3.get(pokemon):
        continue
    else:
        done3[pokemon] = pokemon

    print("Pokemon: ", pokemon)
    article = getRawArticle(pokemon)

    if article != None:

        if checkArticleUniqueness(article):
            sumOfUniqueArticles = sumOfUniqueArticles + 1
            rawArticletoPDF(article, pokemon)
        else:
            continue

    else:
        continue

Pokemon:  Sableye
Pokemon:  Mawile
Pokemon:  Aron
Pokemon:  Lairon
Pokemon:  Aggron
Pokemon:  Meditite
Pokemon:  Medicham
Pokemon:  Electrike
Pokemon:  Manectric
Pokemon:  Plusle
Pokemon:  Minun
Pokemon:  Volbeat
Pokemon:  Illumise
Pokemon:  Roselia
Pokemon:  Gulpin
Pokemon:  Swalot
Pokemon:  Carvanha
Pokemon:  Sharpedo
Pokemon:  Wailmer
Pokemon:  Wailord
Pokemon:  Numel
Pokemon:  Camerupt
Pokemon:  Torkoal
Pokemon:  Spoink
Pokemon:  Grumpig
Pokemon:  Spinda
Pokemon:  Trapinch
Pokemon:  Vibrava
Pokemon:  Flygon
Pokemon:  Cacnea
Pokemon:  Cacturne
Pokemon:  Swablu
Pokemon:  Altaria
Pokemon:  Zangoose
Pokemon:  Seviper
Pokemon:  Lunatone
Pokemon:  Solrock
Pokemon:  Barboach
Pokemon:  Whiscash
Pokemon:  Corphish
Pokemon:  Crawdaunt
Pokemon:  Baltoy
Pokemon:  Claydol
Pokemon:  Lileep
Pokemon:  Cradily
Pokemon:  Anorith
Pokemon:  Armaldo
Pokemon:  Feebas
Pokemon:  Milotic
Pokemon:  Castform


In [72]:
for pokemon in gen3['Name'][100:]:

    if done3.get(pokemon):
        continue
    else:
        done3[pokemon] = pokemon

    print("Pokemon: ", pokemon)
    article = getRawArticle(pokemon)

    if article != None:

        if checkArticleUniqueness(article):
            sumOfUniqueArticles = sumOfUniqueArticles + 1
            rawArticletoPDF(article, pokemon)
        else:
            continue

    else:
        continue

Pokemon:  Kecleon
Pokemon:  Shuppet
Pokemon:  Banette
Pokemon:  Duskull
Pokemon:  Dusclops
Pokemon:  Tropius
Pokemon:  Chimecho
Pokemon:  Absol
Pokemon:  Wynaut
Pokemon:  Snorunt
Pokemon:  Glalie
Pokemon:  Spheal
Pokemon:  Sealeo
Pokemon:  Walrein
Pokemon:  Clamperl
Pokemon:  Huntail
Pokemon:  Gorebyss
Pokemon:  Relicanth
Pokemon:  Luvdisc
Pokemon:  Bagon
Pokemon:  Shelgon
Pokemon:  Salamence
Pokemon:  Beldum
Pokemon:  Metang
Pokemon:  Metagross
Pokemon:  Regirock
Pokemon:  Regice
Pokemon:  Registeel
Pokemon:  Latias
Pokemon:  Latios
Pokemon:  Kyogre
Pokemon:  Groudon
Pokemon:  Rayquaza
Pokemon:  Jirachi
Pokemon:  Deoxys[f]


In [73]:
sumOfUniqueArticles

16

# Gen 4 pokemon

In [7]:
len(gen4)
done4 = {}
gen4['Name'][0]

'Turtwig'

In [14]:
# special function changes with each generation
url = 'https://en.wikipedia.org/api/rest_v1/page/pdf/Turtwig%20(Pokémon)' 
response = requests.get(url)
response.raise_for_status()
#article = response.content
#rawArticletoPDF(article, 'Turtwig')
unUniqueArticle = response.content


def checkArticleUniqueness(article):
    if article != unUniqueArticle:
        return True
    else:
        return False

In [11]:
for pokemon in gen4['Name'][0:50]:

    if done4.get(pokemon):
        continue
    else:
        done4[pokemon] = pokemon

    print("Pokemon: ", pokemon)
    article = getRawArticle(pokemon)

    if article != None:

        if checkArticleUniqueness(article):
            sumOfUniqueArticles = sumOfUniqueArticles + 1
            rawArticletoPDF(article, pokemon)
        else:
            continue

    else:
        continue

Pokemon:  Turtwig
Pokemon:  Grotle
Pokemon:  Torterra
Pokemon:  Chimchar
Pokemon:  Monferno
Pokemon:  Infernape
Pokemon:  Piplup
Pokemon:  Prinplup
Pokemon:  Empoleon
Pokemon:  Starly
Pokemon:  Staravia
Pokemon:  Staraptor
Pokemon:  Bidoof
Pokemon:  Bibarel
Pokemon:  Kricketot
Pokemon:  Kricketune
Pokemon:  Shinx
Pokemon:  Luxio
Pokemon:  Luxray
Pokemon:  Budew
Pokemon:  Roserade
Pokemon:  Cranidos[c]
Pokemon:  Rampardos[c]
Pokemon:  Shieldon[c]
Pokemon:  Bastiodon[c]
Pokemon:  Burmy
Pokemon:  Wormadam
Pokemon:  Mothim
Pokemon:  Combee
Pokemon:  Vespiquen
Pokemon:  Pachirisu
Pokemon:  Buizel
Pokemon:  Floatzel
Pokemon:  Cherubi
Pokemon:  Cherrim
Pokemon:  Shellos
Pokemon:  Gastrodon
Pokemon:  Ambipom
Pokemon:  Drifloon
Pokemon:  Drifblim
Pokemon:  Buneary
Pokemon:  Lopunny
Pokemon:  Mismagius
Pokemon:  Honchkrow
Pokemon:  Glameow
Pokemon:  Purugly
Pokemon:  Chingling
Pokemon:  Stunky
Pokemon:  Skuntank
Pokemon:  Bronzor


In [15]:
for pokemon in gen4['Name'][50:100]:

    if done4.get(pokemon):
        continue
    else:
        done4[pokemon] = pokemon

    print("Pokemon: ", pokemon)
    article = getRawArticle(pokemon)

    if article != None:

        if checkArticleUniqueness(article):
            sumOfUniqueArticles = sumOfUniqueArticles + 1
            rawArticletoPDF(article, pokemon)
        else:
            continue

    else:
        continue

Pokemon:  Hippopotas
Pokemon:  Hippowdon
Pokemon:  Skorupi
Pokemon:  Drapion
Pokemon:  Croagunk
Pokemon:  Toxicroak
Pokemon:  Carnivine
Pokemon:  Finneon
Pokemon:  Lumineon
Pokemon:  Mantyke
Pokemon:  Snover
Pokemon:  Abomasnow
Pokemon:  Weavile
Pokemon:  Magnezone
Pokemon:  Lickilicky
Pokemon:  Rhyperior
Pokemon:  Tangrowth
Pokemon:  Electivire
Pokemon:  Magmortar
Pokemon:  Togekiss
Pokemon:  Yanmega
Pokemon:  Leafeon
Pokemon:  Glaceon
Pokemon:  Gliscor
Pokemon:  Mamoswine
Pokemon:  PorygonZ
Pokemon:  Gallade
Pokemon:  Probopass
Pokemon:  Probopass
Article Dowloaded

Pokemon:  Dusknoir
Pokemon:  Froslass
Pokemon:  Rotom
Pokemon:  Uxie
Pokemon:  Mesprit
Pokemon:  Azelf
Pokemon:  Dialga
Pokemon:  Palkia
Pokemon:  Heatran
Pokemon:  Regigigas


In [16]:
for pokemon in gen4['Name'][100:]:

    if done4.get(pokemon):
        continue
    else:
        done4[pokemon] = pokemon

    print("Pokemon: ", pokemon)
    article = getRawArticle(pokemon)

    if article != None:

        if checkArticleUniqueness(article):
            sumOfUniqueArticles = sumOfUniqueArticles + 1
            rawArticletoPDF(article, pokemon)
        else:
            continue

    else:
        continue

Pokemon:  Giratina
Pokemon:  Cresselia
Pokemon:  Phione
Pokemon:  Manaphy
Pokemon:  Darkrai
Pokemon:  Shaymin
Pokemon:  Arceus


# Gen 5

In [31]:
len(gen5)
done5 = {}
gen5['Name'][4]

'Tepig'

In [None]:
# special function changes with each generation
url = 'https://en.wikipedia.org/api/rest_v1/page/pdf/Tepig%20(Pokémon)' 
response = requests.get(url)
response.raise_for_status()
article = response.content
rawArticletoPDF(article, 'Servine')
#unUniqueArticle = response.content


def checkArticleUniqueness(article):
    if article != unUniqueArticle:
        return True
    else:
        return False

## seems like wiki got lazy so I can too

In [33]:
for pokemon in gen5['Name'][0:]:

    if done5.get(pokemon):
        continue
    else:
        done5[pokemon] = pokemon

    print("Pokemon: ", pokemon)
    article = getRawArticle(pokemon)

    if article != None:

        #if checkArticleUniqueness(article):

        sumOfUniqueArticles = sumOfUniqueArticles + 1
        rawArticletoPDF(article, pokemon)

        #else:
            #continue

    else:
        continue

Pokemon:  Victini
Pokemon:  Snivy
Pokemon:  Servine
Pokemon:  Serperior
Pokemon:  Tepig
Pokemon:  Pignite
Pokemon:  Emboar
Pokemon:  Oshawott
Pokemon:  Dewott
Pokemon:  Samurott
Pokemon:  Patrat
Pokemon:  Watchog
Pokemon:  Lillipup
Pokemon:  Herdier
Pokemon:  Stoutland
Pokemon:  Purrloin
Pokemon:  Liepard
Pokemon:  Pansage
Pokemon:  Simisage
Pokemon:  Pansear
Pokemon:  Simisear
Pokemon:  Panpour
Pokemon:  Simipour
Pokemon:  Munna
Pokemon:  Musharna
Pokemon:  Pidove
Pokemon:  Tranquill
Pokemon:  Unfezant
Pokemon:  Blitzle
Pokemon:  Zebstrika
Pokemon:  Roggenrola
Pokemon:  Boldore
Pokemon:  Gigalith
Pokemon:  Woobat
Pokemon:  Swoobat
Pokemon:  Drilbur
Pokemon:  Excadrill
Pokemon:  Audino
Pokemon:  Timburr
Pokemon:  Gurdurr
Pokemon:  Conkeldurr
Pokemon:  Tympole
Pokemon:  Palpitoad
Pokemon:  Seismitoad
Pokemon:  Throh
Pokemon:  Sawk
Pokemon:  Sewaddle
Pokemon:  Swadloon
Pokemon:  Leavanny
Pokemon:  Venipede
Pokemon:  Whirlipede
Pokemon:  Scolipede
Pokemon:  Cottonee
Pokemon:  Whimsicott
P

# Gen 6

In [35]:
len(gen6)
done6 = {}
gen6['Name'][0]

'Chespin'

In [None]:
# special function changes with each generation
url = 'https://en.wikipedia.org/api/rest_v1/page/pdf/Chespin%20(Pokémon)' 
response = requests.get(url)
response.raise_for_status()
article = response.content
rawArticletoPDF(article, 'Chespin')
#unUniqueArticle = response.content


def checkArticleUniqueness(article):
    if article != unUniqueArticle:
        return True
    else:
        return False

## wiki got lazy again

In [37]:
for pokemon in gen6['Name'][0:]:

    if done6.get(pokemon):
        continue
    else:
        done6[pokemon] = pokemon

    print("Pokemon: ", pokemon)
    article = getRawArticle(pokemon)

    if article != None:

        #if checkArticleUniqueness(article):

        sumOfUniqueArticles = sumOfUniqueArticles + 1
        rawArticletoPDF(article, pokemon)

        #else:
            #continue

    else:
        continue

Pokemon:  Chespin
Pokemon:  Quilladin
Pokemon:  Chesnaught
Pokemon:  Fennekin
Pokemon:  Braixen
Pokemon:  Delphox
Pokemon:  Froakie
Pokemon:  Frogadier
Pokemon:  Greninja
Pokemon:  Bunnelby
Pokemon:  Diggersby
Pokemon:  Fletchling
Pokemon:  Fletchinder
Pokemon:  Talonflame
Pokemon:  Scatterbug
Pokemon:  Spewpa
Pokemon:  Vivillon
Pokemon:  Litleo
Pokemon:  Pyroar
Pokemon:  Flabb
Pokemon:  Floette
Pokemon:  Florges
Pokemon:  Skiddo
Pokemon:  Gogoat
Pokemon:  Pancham
Pokemon:  Pangoro
Pokemon:  Furfrou
Pokemon:  Espurr
Pokemon:  Meowstic
Pokemon:  Honedge
Pokemon:  Doublade
Pokemon:  Aegislash
Pokemon:  Spritzee
Pokemon:  Aromatisse
Pokemon:  Swirlix
Pokemon:  Slurpuff
Pokemon:  Inkay
Pokemon:  Malamar
Pokemon:  Binacle
Pokemon:  Barbaracle
Pokemon:  Skrelp
Pokemon:  Dragalge
Pokemon:  Clauncher
Pokemon:  Clawitzer
Pokemon:  Helioptile
Pokemon:  Heliolisk
Pokemon:  Tyrunt
Pokemon:  Tyrantrum
Pokemon:  Amaura
Pokemon:  Aurorus
Pokemon:  Sylveon
Pokemon:  Hawlucha
Pokemon:  Dedenne
Pokemon:

# Gen 7

In [20]:
len(gen7)
done7 = {}
gen7['Name'][0]

'Rowlet'

In [12]:
# special function changes with each generation
url = 'https://en.wikipedia.org/api/rest_v1/page/pdf/Rowlet%20(Pokémon)' 
response = requests.get(url)
response.raise_for_status()
#article = response.content
#rawArticletoPDF(article, 'Rowlet')
unUniqueArticle = response.content


def checkArticleUniqueness(article):
    if article != unUniqueArticle:
        return True
    else:
        return False

## okay so wiki decided not to be lazy anymore

In [13]:
for pokemon in gen7['Name'][0:50]:

    if done7.get(pokemon):
        continue
    else:
        done7[pokemon] = pokemon

    print("Pokemon: ", pokemon)
    article = getRawArticle(pokemon)

    if article != None:

        if checkArticleUniqueness(article):

            sumOfUniqueArticles = sumOfUniqueArticles + 1
            rawArticletoPDF(article, pokemon)

        else:
            continue

    else:
        continue

Pokemon:  Rowlet
Pokemon:  Dartrix
Pokemon:  Decidueye
Pokemon:  Litten
Pokemon:  Torracat
Pokemon:  Incineroar
Pokemon:  Popplio
Pokemon:  Popplio
Article Dowloaded

Pokemon:  Brionne
Pokemon:  Primarina
Pokemon:  Pikipek
Pokemon:  Trumbeak
Pokemon:  Toucannon
Pokemon:  Yungoos
Pokemon:  Gumshoos
Pokemon:  Grubbin
Pokemon:  Charjabug
Pokemon:  Vikavolt
Pokemon:  Crabrawler
Pokemon:  Crabominable
Pokemon:  Oricorio
Pokemon:  Cutiefly
Pokemon:  Ribombee
Pokemon:  Rockruff
Pokemon:  Lycanroc
Pokemon:  Wishiwashi
Pokemon:  Mareanie
Pokemon:  Toxapex
Pokemon:  Mudbray
Pokemon:  Mudsdale
Pokemon:  Dewpider
Pokemon:  Araquanid
Pokemon:  Fomantis
Pokemon:  Lurantis
Pokemon:  Morelull
Pokemon:  Shiinotic
Pokemon:  Salandit
Pokemon:  Salazzle
Pokemon:  Stufful
Pokemon:  Bewear
Pokemon:  Bounsweet
Pokemon:  Steenee
Pokemon:  Tsareena
Pokemon:  Comfey
Pokemon:  Oranguru
Pokemon:  Passimian
Pokemon:  Wimpod
Pokemon:  Golisopod
Pokemon:  Sandygast
Pokemon:  Palossand
Pokemon:  Pyukumuku


In [15]:
for pokemon in gen7['Name'][50:]:

    if done7.get(pokemon):
        continue
    else:
        done7[pokemon] = pokemon

    print("Pokemon: ", pokemon)
    article = getRawArticle(pokemon)

    if article != None:

        if checkArticleUniqueness(article):

            sumOfUniqueArticles = sumOfUniqueArticles + 1
            rawArticletoPDF(article, pokemon)

        else:
            continue

    else:
        continue

Pokemon:  TypeNull
Pokemon:  Silvally
Pokemon:  Minior
Pokemon:  Komala
Pokemon:  Turtonator
Pokemon:  Togedemaru
Pokemon:  Mimikyu
Pokemon:  Mimikyu
Article Dowloaded

Pokemon:  Bruxish
Pokemon:  Drampa
Pokemon:  Dhelmise
Pokemon:  Jangmoo
Pokemon:  Hakamoo
Pokemon:  Kommoo
Pokemon:  TapuKoko
Pokemon:  TapuLele
Pokemon:  TapuBulu
Pokemon:  TapuFini
Pokemon:  Cosmog
Pokemon:  Cosmoem
Pokemon:  Solgaleo
Pokemon:  Lunala
Pokemon:  Nihilego
Pokemon:  Buzzwole
Pokemon:  Pheromosa
Pokemon:  Xurkitree
Pokemon:  Celesteela
Pokemon:  Kartana
Pokemon:  Guzzlord
Pokemon:  Necrozma
Pokemon:  Magearna
Pokemon:  Marshadow
Pokemon:  Poipole
Pokemon:  Naganadel
Pokemon:  Stakataka
Pokemon:  Blacephalon
Pokemon:  Zeraora
Pokemon:  Meltan
Pokemon:  Melmetal


# Gen 8

In [18]:
print(len(gen8))
done8 = {}
gen8['Name'][1]

96


'Thwackey'

In [None]:
# special function changes with each generation
url = 'https://en.wikipedia.org/api/rest_v1/page/pdf/Thwackey%20(Pokémon)' 
response = requests.get(url)
response.raise_for_status()
#article = response.content
#rawArticletoPDF(article, 'Grookey')
#unUniqueArticle = response.content


def checkArticleUniqueness(article):
    if article != unUniqueArticle:
        return True
    else:
        return False

## seems like wiki got lazy again

In [21]:
for pokemon in gen8['Name'][0:]:

    if done8.get(pokemon):
        continue
    else:
        done8[pokemon] = pokemon

    print("Pokemon: ", pokemon)
    article = getRawArticle(pokemon)

    if article != None:

        #if checkArticleUniqueness(article):

        sumOfUniqueArticles = sumOfUniqueArticles + 1
        rawArticletoPDF(article, pokemon)

        #else:
            #continue

    else:
        continue

Pokemon:  Grookey
Pokemon:  Thwackey
Pokemon:  Rillaboom
Pokemon:  Scorbunny
Pokemon:  Raboot
Pokemon:  Cinderace
Pokemon:  Sobble
Pokemon:  Drizzile
Pokemon:  Inteleon
Pokemon:  Skwovet
Pokemon:  Greedent
Pokemon:  Rookidee
Pokemon:  Corvisquire
Pokemon:  Corviknight
Pokemon:  Blipbug
Pokemon:  Dottler
Pokemon:  Orbeetle
Pokemon:  Nickit
Pokemon:  Thievul
Pokemon:  Gossifleur
Pokemon:  Eldegoss
Pokemon:  Wooloo
Pokemon:  Dubwool
Pokemon:  Chewtle
Pokemon:  Drednaw
Pokemon:  Yamper
Pokemon:  Boltund
Pokemon:  Rolycoly
Pokemon:  Carkol
Pokemon:  Coalossal
Pokemon:  Applin
Pokemon:  Flapple
Pokemon:  Appletun
Pokemon:  Silicobra
Pokemon:  Sandaconda
Pokemon:  Cramorant
Pokemon:  Arrokuda
Pokemon:  Barraskewda
Pokemon:  Toxel
Pokemon:  Toxtricity
Pokemon:  Sizzlipede
Pokemon:  Centiskorch
Pokemon:  Clobbopus
Pokemon:  Grapploct
Pokemon:  Sinistea
Pokemon:  Polteageist
Pokemon:  Hatenna
Pokemon:  Hattrem
Pokemon:  Hatterene
Pokemon:  Impidimp
Pokemon:  Morgrem
Pokemon:  Grimmsnarl
Pokemon:

# Gen 9

In [25]:
print(len(gen9))
done9 = {}
gen9['Name'][2]

10


'Quaxly'

In [None]:
# special function changes with each generation
url = 'https://en.wikipedia.org/api/rest_v1/page/pdf/Quaxly%20(Pokémon)' 
response = requests.get(url)
response.raise_for_status()
article = response.content
rawArticletoPDF(article, 'Fuecoco')
unUniqueArticle = response.content


def checkArticleUniqueness(article):
    if article != unUniqueArticle:
        return True
    else:
        return False

## wiki got lazy yet again

In [27]:
for pokemon in gen9['Name'][0:]:

    if done9.get(pokemon):
        continue
    else:
        done9[pokemon] = pokemon

    print("Pokemon: ", pokemon)
    article = getRawArticle(pokemon)

    if article != None:

        #if checkArticleUniqueness(article):

        sumOfUniqueArticles = sumOfUniqueArticles + 1
        rawArticletoPDF(article, pokemon)

        #else:
            #continue

    else:
        continue

Pokemon:  Sprigatito
Pokemon:  Fuecoco
Pokemon:  Quaxly
Pokemon:  Pawmi
Pokemon:  Lechonk
Pokemon:  Smoliv
Pokemon:  Fidough
Pokemon:  Cetitan
Pokemon:  Koraidon
Pokemon:  Miraidon


# Done 👍🏾