# Extracting data from Steam 

## Initial Setup

In [87]:
from bs4 import BeautifulSoup
import requests

## Connect to Steam webpage

In [88]:
r = requests.get("https://store.steampowered.com/tags/en/Action/")
r.status_code

200

In [89]:
html = r.content

In [90]:
soup = BeautifulSoup(html, "lxml")

In [91]:
#export as html file

with open ('steam.html','wb') as file:
    file.write(soup.prettify('utf-8'))

## What can we scrape from this webpage?
## 1) Try extracting the names of the top games from this page.
## 2) What tags contain the prices?  Can you extract the price information?
## 3) Get all of the header tags on the page
## 4) Can you get the text from each span tag with class equal to "top_tag"?
## 5) Under the "Narrow by Tag" section, there are a collection of tags (e.g. "Indie", "Adventure", etc.).  Write code to return these tags.
## 6) What else can be scraped from this webpage or others on the site?

## Now is your turn!

----------------

## 1) New and Trending Games

In [94]:
div_new_trending = soup.find_all('div',{'id':"NewReleasesRows"})
div_new_trending

[<div id="NewReleasesRows">
 <a class="tab_item" data-ds-appid="1253950" data-ds-crtrids="[34832848]" data-ds-itemkey="App_1253950" data-ds-tagids="[19,492,122,113,42804,1646,1684]" href="https://store.steampowered.com/app/1253950/Dreamscaper_Prologue/?snr=1_241_4_action_103" onmouseout="HideGameHover( this, event, 'global_hover' )" onmouseover="GameHover( this, event, 'global_hover', {&quot;type&quot;:&quot;app&quot;,&quot;id&quot;:1253950,&quot;params&quot;:{&quot;bDisableHover&quot;:false},&quot;public&quot;:1,&quot;v6&quot;:1} );">
 <div class="tab_item_cap">
 <img class="tab_item_cap_img" src="https://steamcdn-a.akamaihd.net/steam/apps/1253950/capsule_184x69.jpg?t=1586347340"/>
 </div>
 <div class="discount_block tab_item_discount no_discount" data-price-final="0"><div class="discount_prices"><div class="discount_final_price">Free</div></div></div> <div class="tab_item_content">
 <div class="tab_item_name">Dreamscaper: Prologue</div>
 <div class="tab_item_details">
 <span class="p

### 1) New and Trending Games - Names

In [95]:
new_trending_games_name_list = [div.text for div in div_new_trending[0].find_all('div',{'class':'tab_item_name'})]
new_trending_games_name_list

['Dreamscaper: Prologue',
 'RESIDENT EVIL 3',
 'ONE PIECE: PIRATE WARRIORS 4',
 'Eternal Radiance',
 'Deadside',
 "Conqueror's Blade",
 'Borderlands 3',
 'Granblue Fantasy: Versus',
 'Receiver 2',
 'Rakion Chaos Force',
 'Mount & Blade II: Bannerlord',
 'Half-Life: Alyx',
 'Last Oasis',
 'DOOM Eternal',
 'One Step From Eden']

### 2) New and Trending Games - Price

In [96]:
new_trending_games_price_list = [div.text for div in div_new_trending[0].find_all('div',{'class':'discount_final_price'})]
new_trending_games_price_list

['Free',
 '$59.99',
 '$59.99',
 '$16.19',
 '$19.99',
 'Free to Play',
 '$59.99',
 '$59.99',
 '$17.99',
 'Free',
 '$49.99',
 '$59.99',
 '$29.99',
 '$59.99',
 '$19.99']

### New and Trending Games - Tags

In [98]:
new_trending_games_tags_list = [div.text for div in div_new_trending[0].find_all('div',{'class':'tab_item_top_tags'})]
new_trending_games_tags_list

['Action, Indie, RPG, Free to Play',
 'Action, Zombies, Horror, Survival Horror',
 'Action, Anime, Co-op, Online Co-Op',
 'Action, Adventure, RPG, Anime',
 'Massively Multiplayer, Action, Adventure, Indie',
 'Strategy, Massively Multiplayer, Action, Simulation',
 'RPG, Action, Online Co-Op, Looter Shooter',
 'Action, Anime, Fighting, 2D Fighter',
 'Simulation, Indie, Action, Shooter',
 'Action, RPG, Free to Play, Strategy',
 'Early Access, Medieval, Strategy, Open World',
 'Masterpiece, Action, VR, Adventure',
 'Massively Multiplayer, Survival, Action, Adventure',
 'Action, Masterpiece, Great Soundtrack, FPS',
 'Action, Strategy, Indie, Adventure']

### Representing data using Pandas

In [76]:
import pandas as pd

In [78]:
new_trending_games_info = pd.DataFrame()

new_trending_games_info['Title'] = new_trending_games_name_list
new_trending_games_info['Price'] = new_trending_games_price_list
new_trending_games_info ['Tags'] = new_trending_games_tags_list

new_trending_games_info

Unnamed: 0,Title,Price,Tags
0,Dreamscaper: Prologue,Free,"Action, Indie, RPG, Free to Play"
1,RESIDENT EVIL 3,$59.99,"Action, Zombies, Horror, Survival Horror"
2,ONE PIECE: PIRATE WARRIORS 4,$59.99,"Action, Anime, Co-op, Online Co-Op"
3,Eternal Radiance,$16.19,"Action, Adventure, RPG, Anime"
4,Deadside,$19.99,"Massively Multiplayer, Action, Adventure, Indie"
5,Conqueror's Blade,Free to Play,"Strategy, Massively Multiplayer, Action, Simul..."
6,Borderlands 3,$59.99,"RPG, Action, Online Co-Op, Looter Shooter"
7,Granblue Fantasy: Versus,$59.99,"Action, Anime, Fighting, 2D Fighter"
8,Receiver 2,$17.99,"Simulation, Indie, Action, Shooter"
9,Rakion Chaos Force,Free,"Action, RPG, Free to Play, Strategy"


### exporting to csv

In [79]:
new_trending_games_info.to_csv('New_Trending_Games_Info.csv', index = False, header = True)

-------------

## 2) Getting games info for "Top Sellers", "What's popular", "Top Rated"

In [146]:
import pandas as pd

game_list_file_names = ['Top_Sellers_Games_info.csv', 'Trending_Games_info.csv', 'Top_Rated_Games.info.csv']
games_info_dataframe_list = []

div_class_list = ['TopSellersRows', 'ConcurrentUsersRows', 'TopRatedRows']
# div_class_list = [ComingSoonRows']
    
for game_type_class in div_class_list:
    
    div_table = soup.find_all('div', attrs={"id": game_type_class})
    
    games_name_list = []
    games_price_list = []
    games_tags_list = []
    
    for div_item in div_table:
        games_name_list = [div.text for div in div_item.find_all('div',{'class':'tab_item_name'})] 
        games_price_list = [div.text for div in div_item.find_all('div',{'class':'discount_final_price'})]
        games_tags_list = [div.text for div in div_item.find_all('div',{'class':'tab_item_top_tags'})]
    
    #setting as dataframe for each game type
    games_info = pd.DataFrame()

    games_info['Title'] = games_name_list
    games_info['Price'] = games_price_list
    games_info ['Tags'] = games_tags_list
    
    games_info_dataframe_list.append(games_info)

#exporting to csv
for i in range(3):
    game_info = games_info_dataframe_list[i]
    games_info.to_csv(game_list_file_names[i], index = False, header = True)

----------------

## 3) Getting header tags of the page

In [151]:
game_header_tags_div = soup.find_all('div', {'class':'tab_content'})
game_header_tags_div

[<div class="tab_content">
 													New and Trending												</div>,
 <div class="tab_content">
 													Top Selling												</div>,
 <div class="tab_content">
 													What's Popular												</div>,
 <div class="tab_content">
 													Top Rated												</div>,
 <div class="tab_content">
 													Upcoming												</div>]

In [152]:
game_header_tags_list = [div.text.strip() for div in game_header_tags_div]
game_header_tags_list

['New and Trending', 'Top Selling', "What's Popular", 'Top Rated', 'Upcoming']

-----------

## 4) Info of top_tag span


In [159]:
top_tag_div = soup.find_all('span',{'class':'top_tag'})
top_tag_div

[<span class="top_tag">Action</span>,
 <span class="top_tag">, Indie</span>,
 <span class="top_tag">, RPG</span>,
 <span class="top_tag">, Free to Play</span>,
 <span class="top_tag">Action</span>,
 <span class="top_tag">, Zombies</span>,
 <span class="top_tag">, Horror</span>,
 <span class="top_tag">, Survival Horror</span>,
 <span class="top_tag">Action</span>,
 <span class="top_tag">, Anime</span>,
 <span class="top_tag">, Co-op</span>,
 <span class="top_tag">, Online Co-Op</span>,
 <span class="top_tag">Action</span>,
 <span class="top_tag">, Adventure</span>,
 <span class="top_tag">, RPG</span>,
 <span class="top_tag">, Anime</span>,
 <span class="top_tag">Massively Multiplayer</span>,
 <span class="top_tag">, Action</span>,
 <span class="top_tag">, Adventure</span>,
 <span class="top_tag">, Indie</span>,
 <span class="top_tag">Strategy</span>,
 <span class="top_tag">, Massively Multiplayer</span>,
 <span class="top_tag">, Action</span>,
 <span class="top_tag">, Simulation</span>,

In [163]:
top_tag_list = [div.text.strip(', ') for div in top_tag_div]
top_tag_list

['Action',
 'Indie',
 'RPG',
 'Free to Play',
 'Action',
 'Zombies',
 'Horror',
 'Survival Horror',
 'Action',
 'Anime',
 'Co-op',
 'Online Co-Op',
 'Action',
 'Adventure',
 'RPG',
 'Anime',
 'Massively Multiplayer',
 'Action',
 'Adventure',
 'Indie',
 'Strategy',
 'Massively Multiplayer',
 'Action',
 'Simulation',
 'RPG',
 'Action',
 'Online Co-Op',
 'Looter Shooter',
 'Action',
 'Anime',
 'Fighting',
 '2D Fighter',
 'Simulation',
 'Indie',
 'Action',
 'Shooter',
 'Action',
 'RPG',
 'Free to Play',
 'Strategy',
 'Early Access',
 'Medieval',
 'Strategy',
 'Open World',
 'Masterpiece',
 'Action',
 'VR',
 'Adventure',
 'Massively Multiplayer',
 'Survival',
 'Action',
 'Adventure',
 'Action',
 'Masterpiece',
 'Great Soundtrack',
 'FPS',
 'Action',
 'Strategy',
 'Indie',
 'Adventure',
 'Action',
 'Multiplayer',
 'Co-op',
 'Hunting',
 'Action',
 'Anime',
 'Fighting',
 '2D Fighter',
 'RPG',
 'Action',
 'Online Co-Op',
 'Looter Shooter',
 'Action',
 'Anime',
 'Co-op',
 'Online Co-Op',
 'Actio

In [165]:
unique_top_tags = set(top_tag_list)
unique_top_tags

{'2D',
 '2D Fighter',
 '3D Platformer',
 'Action',
 'Action Roguelike',
 'Adventure',
 'Anime',
 'Arena Shooter',
 'Battle Royale',
 'Casual',
 'Classic',
 'Co-op',
 'Comedy',
 'Competitive',
 'Crafting',
 'Crime',
 'Difficult',
 'Early Access',
 'Exploration',
 'FPS',
 'Female Protagonist',
 'Fighting',
 'Free to Play',
 'Great Soundtrack',
 'Heist',
 'Hero Shooter',
 'Horror',
 'Hunting',
 'Indie',
 'Loot',
 'Looter Shooter',
 'MOBA',
 'Massively Multiplayer',
 'Masterpiece',
 'Medieval',
 'Military',
 'Multiplayer',
 'Online Co-Op',
 'Open World',
 'Open World Survival Craft',
 'Pirates',
 'RPG',
 'Racing',
 'Rhythm',
 'Roguelike',
 'Sci-fi',
 'Shooter',
 'Simulation',
 'Singleplayer',
 'Soccer',
 'Souls-like',
 'Space',
 'Sports',
 'Story Rich',
 'Strategy',
 'Survival',
 'Survival Horror',
 'Tactical',
 'Third Person',
 'Third-Person Shooter',
 'VR',
 'Violent',
 'Zombies'}

------------------

## 5) Narrow By Tag section tags 

In [167]:
narrow_by_tag_list = [div.text for div in soup.find_all('span', {'class':'tag_name'})]
narrow_by_tag_list

['Indie',
 'Adventure',
 'Casual',
 'Singleplayer',
 'RPG',
 'Early Access',
 'Simulation',
 'Strategy',
 'Multiplayer',
 '2D',
 'Violent',
 'Shooter']