In [1]:
import pandas as pd
import numpy as np
import requests
import lxml.html as lh

In [2]:
###The goal of this sheet is to create a dataframe that contains each pokemon from the red and blue version's basic stats
#as well as what moves they have ( 1 if they can learn a move 0 if they can't).

#All of the data we will be using can be found on 'https://pokemondb.net/move/generation/1'

#First, we will create a list of every pokemon attack from the games by scraping the below website.
url = 'https://pokemondb.net/move/generation/1'
html = requests.get(url)
doc = lh.fromstring(html.content)
pokemon = doc.xpath('//td[@class="cell-name"]')
pokemon1 = doc.xpath('//td[@class="cell-name"]')[0]
names = pokemon1.xpath('//a[@class="ent-name"]/text()')
names

['Absorb',
 'Acid',
 'Acid Armor',
 'Agility',
 'Amnesia',
 'Aurora Beam',
 'Barrage',
 'Barrier',
 'Bide',
 'Bind',
 'Bite',
 'Blizzard',
 'Body Slam',
 'Bone Club',
 'Bonemerang',
 'Bubble',
 'Bubble Beam',
 'Clamp',
 'Comet Punch',
 'Confuse Ray',
 'Confusion',
 'Constrict',
 'Conversion',
 'Counter',
 'Crabhammer',
 'Cut',
 'Defense Curl',
 'Dig',
 'Disable',
 'Dizzy Punch',
 'Double Kick',
 'Double Slap',
 'Double Team',
 'Double-Edge',
 'Dragon Rage',
 'Dream Eater',
 'Drill Peck',
 'Earthquake',
 'Egg Bomb',
 'Ember',
 'Explosion',
 'Fire Blast',
 'Fire Punch',
 'Fire Spin',
 'Fissure',
 'Flamethrower',
 'Flash',
 'Fly',
 'Focus Energy',
 'Fury Attack',
 'Fury Swipes',
 'Glare',
 'Growl',
 'Growth',
 'Guillotine',
 'Gust',
 'Harden',
 'Haze',
 'Headbutt',
 'High Jump Kick',
 'Horn Attack',
 'Horn Drill',
 'Hydro Pump',
 'Hyper Beam',
 'Hyper Fang',
 'Hypnosis',
 'Ice Beam',
 'Ice Punch',
 'Jump Kick',
 'Karate Chop',
 'Kinesis',
 'Leech Life',
 'Leech Seed',
 'Leer',
 'Lick',
 '

In [18]:
##Each move has a link to another page that will tell us which pokemon can use that move.
#There is a base website address : 'https://pokemondb.net/move/'
#We're going to create a loop that saves move data in a dictionary.
#First, though, we will have to change all of the attacks from having a space between words to having a '-'
for i in range(len(names)):
    names[i] = names[i].replace(' ', '-')

In [3]:
##It looks like there are roughly 167 moves in the original pokemon games. If we upload all of those manually, it's going
#to take a long time and there are a lot of opportunities to make mistakes.  Instead, let's create a function to scrape
# all the data we need and create a dictionary that will have the move names as keys and the pokemon that can perform
#the move as the values
def moves(link):
    url = link
    html = requests.get(url)
    doc = lh.fromstring(html.content)
    pokemon = doc.xpath('//span[@class="infocard-md-data"]')
    
    x = []
    for i in range(len(pokemon)):
        pokemon1 = doc.xpath('//span[@class="infocard-md-data"]')[i]
        names = pokemon1.xpath('.//a[@class="ent-name"]/text()')
        x.extend(names)
    return x

In [20]:
#Now we are going to create each move as a dictionary
all_moves = []
for i in names:
    x = {}
    x = {i:moves(('https://pokemondb.net/move/'+i))}
    all_moves.append(x)
all_moves

[{'Absorb': ['Zubat',
   'Golbat',
   'Oddish',
   'Gloom',
   'Paras',
   'Parasect',
   'Parasect',
   'Tangela',
   'Kabuto',
   'Kabutops',
   'Kabutops',
   'Spinarak',
   'Ariados',
   'Ariados',
   'Crobat',
   'Hoppip',
   'Skiploom',
   'Jumpluff',
   'Sunkern',
   'Sunflora',
   'Treecko',
   'Grovyle',
   'Grovyle',
   'Sceptile',
   'Sceptile',
   'Beautifly',
   'Lotad',
   'Lombre',
   'Shroomish',
   'Breloom',
   'Nincada',
   'Ninjask',
   'Ninjask',
   'Shedinja',
   'Shedinja',
   'Roselia',
   'Cacnea',
   'Cacturne',
   'Cacturne',
   'Turtwig',
   'Grotle',
   'Grotle',
   'Torterra',
   'Torterra',
   'Kricketune',
   'Budew',
   'Tangrowth',
   'Cottonee',
   'Petilil',
   'Maractus',
   'Foongus',
   'Amoonguss',
   'Frillish',
   'Jellicent',
   'Jellicent',
   'Joltik',
   'Galvantula',
   'Shelmet',
   'Accelgor',
   'Larvesta',
   'Volcarona',
   'Volcarona',
   'Goomy',
   'Sliggoo',
   'Sliggoo',
   'Goodra',
   'Goodra',
   'Noibat',
   'Noivern',
   'No

In [233]:
##Now we will create two lists:
#The first will consist of all the keys in all_moves
#The second will consist of all the values from all_moves
move_name = []
pokemon = []

for i in all_moves:
    move_name.extend(i.keys())
    pokemon.extend(i.values())

In [None]:
##Not all pokemon can do all attacks. This creates a bunch of lists with different lengths.
#In order to create 1 big dataframe, we will need to make all the lists the same length
#We will use the letter 'A' to add filler to each list until it reaches length 1000
#We are going to merge this data with a specific list of pokemon later so we will eventually get rid of all the 'A''s
for i in range(len(pokemon)):
    while len(pokemon[i]) < 1000:
        pokemon[i].extend('A')

In [297]:
a = np.array(pokemon)
df5 = pd.DataFrame(data = a, index = move_name)
df5.head()
#We have all the moves and pokemon in one spreadsheet but this doesn't look like it will merge very well with our master sheet

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
Absorb,Zubat,Golbat,Oddish,Gloom,Paras,Parasect,Parasect,Tangela,Kabuto,Kabutops,...,A,A,A,A,A,A,A,A,A,A
Acid,Ekans,Arbok,Oddish,Gloom,Gloom,Bellsprout,Weepinbell,Tentacool,Tentacruel,Tentacruel,...,A,A,A,A,A,A,A,A,A,A
Acid-Armor,Grimer,Grimer,Muk,Muk,Vaporeon,Phione,Manaphy,Vanillite,Vanillish,Vanilluxe,...,A,A,A,A,A,A,A,A,A,A
Agility,Beedrill,Pidgey,Pidgeotto,Pidgeot,Spearow,Fearow,Pikachu,Growlithe,Ponyta,Rapidash,...,A,A,A,A,A,A,A,A,A,A
Amnesia,Psyduck,Golduck,Slowpoke,Slowbro,Snorlax,Mewtwo,Mew,Sentret,Furret,Wooper,...,A,A,A,A,A,A,A,A,A,A


In [298]:
#If we switch the rows and columns though, it will merge very nicely with our master sheet so let's do some transposing!
df6 = df5.transpose()
df6.head()

Unnamed: 0,Absorb,Acid,Acid-Armor,Agility,Amnesia,Aurora-Beam,Barrage,Barrier,Bide,Bind,...,Tri-Attack,Twineedle,Vice-Grip,Vine-Whip,Water-Gun,Waterfall,Whirlwind,Wing-Attack,Withdraw,Wrap
0,Zubat,Ekans,Grimer,Beedrill,Psyduck,Vulpix,Exeggcute,Tentacool,Sandshrew,Onix,...,Dugtrio,Beedrill,Krabby,Bulbasaur,Squirtle,Goldeen,Butterfree,Charizard,Squirtle,Ekans
1,Golbat,Arbok,Grimer,Pidgey,Golduck,Seel,Exeggutor,Tentacruel,Pineco,Tangela,...,Dugtrio,Escavalier,Kingler,Ivysaur,Wartortle,Seaking,Pidgey,Pidgey,Wartortle,Arbok
2,Oddish,Oddish,Muk,Pidgeotto,Slowpoke,Dewgong,Exeggutor,Mr. Mime,Forretress,Pinsir,...,Magneton,Escavalier,Kingler,Venusaur,Wartortle,Squirtle,Pidgeotto,Pidgeotto,Blastoise,Bellsprout
3,Gloom,Gloom,Muk,Pidgeot,Slowbro,Shellder,A,Mewtwo,Shuckle,Steelix,...,Dodrio,Shellder,Pinsir,Venusaur,Blastoise,Wartortle,Pidgeot,Pidgeot,Blastoise,Weepinbell
4,Paras,Gloom,Vaporeon,Spearow,Snorlax,Cloyster,A,Mew,Miltank,Kecleon,...,Porygon,Cloyster,Mawile,Bellsprout,Blastoise,Blastoise,Lugia,Zubat,Slowbro,Weepinbell


In [299]:
##Now that we have our moves dataframe created, let's upload a sheet with the final evolution stages and their stats
#This sheet was created with the help of a tutorial found at :
## https://towardsdatascience.com/web-scraping-html-tables-with-python-c9baba21059

#The data for the chart somes from:
# https://pokemondb.net/pokedex/all
p = pd.read_csv('C:\\Users\\tilleymusprime\\Desktop\\evolved pokemon.csv')
df = df6

In [331]:
#Now that we have our sheets, let's combine them into one big dataframe
#First, we will create a loop that goes through each column name in df
#Let's make a list of the pokemon names (dataframe p), a list of all the columns in df, and an empty list to append things
#We want to do this inside the loop because we want z to reset to an empty list each time or things will become a mess
#after the first iteration
#We will also create a new column in p with the move name as the column name
#Next, we will test if h(pokemon name) is in each move column(y)
#If a pokemon name does appear in the move column, we will add a 1 to p['Move Name[i]']
#If it does not appear in that column then we will return a 0 for that value
for i in df.columns:
    x = list(p['Name'])
    y = list(df[i])
    z = []
    for h in x:
        if h in y:
            z.append(1)
        else:
            z.append(0)
    p[i] = z

In [332]:
p.head()
##Let's take a look at our finished worksheet

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,...,Tri-Attack,Twineedle,Vice-Grip,Vine-Whip,Water-Gun,Waterfall,Whirlwind,Wing-Attack,Withdraw,Wrap
0,3,Venusaur,Grass,Poison,525,80,82,83,100,100,...,1,0,0,1,0,0,0,0,0,0
1,6,Charizard,Fire,,534,78,84,78,109,85,...,0,0,0,0,0,0,0,1,0,0
2,9,Blastoise,Water,,530,79,83,100,85,105,...,0,0,0,0,1,1,0,0,1,0
3,12,Butterfree,Bug,Flying,395,60,45,50,90,80,...,0,0,0,0,0,0,1,0,0,0
4,15,Beedrill,Bug,Poison,395,65,90,40,45,80,...,0,1,0,0,0,0,0,0,0,0


In [333]:
####p.to_csv('C:\\Users\\tilleymusprime\\Desktop\\pokemonwithmoves.csv')