# In this project, I will generate information for creating a database through an API. The API used will be TwitchAPI, I will try to explore some endpoints to produce analyzable data.
## Twitch: is a streaming environment, with a focus on games but also entertainment.

# Importing libraries

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from datetime import date
from tqdm.auto import tqdm
import sqlalchemy as db

# USING API

## Creating tables

### Games, Viewers and Channels

This table will show the games being played the most and their respective channel numbers.

In [2]:
headers = {'Client-ID' : 'h8zvs5dtl6ukn16cvf8s2ade7xw6km', 'Accept' : 'application/vnd.twitchtv.v5+json'}
gameviewers = pd.DataFrame()
for i in range(0,11):
    url = 'https://api.twitch.tv/kraken/games/top?offset='+str(i) +'&Limit=100'
    response = requests.get(url, headers = headers)
    status_code = response.status_code
    games = response.json()['top']
    for game in games:
        today = date.today()
        game_id = game['game']['_id']
        game_name = game['game']['name']
        viewers = int(game['viewers'])
        channels = int(game['channels'])
        mini_df = pd.DataFrame({'date':[today],
                                'id':[game_id],
                               'name': [game_name],
                               'viewers': [viewers],
                               'channels': [channels]})
        gameviewers = pd.concat([gameviewers,mini_df])
gameviewers = gameviewers.reset_index(drop=True)

In [3]:
gameviewers

Unnamed: 0,date,id,name,viewers,channels
0,2020-03-23,509658,Just Chatting,157891,2832
1,2020-03-23,509538,Animal Crossing: New Horizons,139534,1378
2,2020-03-23,21779,League of Legends,137068,4237
3,2020-03-23,512710,Call of Duty: Modern Warfare,117706,4983
4,2020-03-23,32399,Counter-Strike: Global Offensive,109726,2149
...,...,...,...,...,...
101,2020-03-23,27471,Minecraft,27696,1599
102,2020-03-23,18122,World of Warcraft,27548,1194
103,2020-03-23,511224,Apex Legends,24373,1762
104,2020-03-23,512804,FIFA 20,23175,540


### Most viewed channels of top games



In [4]:
stream_channel = pd.DataFrame()
for i in tqdm(range(10)):
    url = 'https://api.twitch.tv/kraken/games/top'
    response = requests.get(url, headers = headers)
    games = response.json()
    top10games = (games['top'][i]['game']['name'])
    url_query = 'https://api.twitch.tv/kraken/search/streams?query='+top10games+'&Limit=100'
    response = requests.get(url_query, headers = headers)
    streams = response.json()['streams']
    for i in range(10):
        try:
            today = date.today()
            stream_id = streams[i]['channel']['_id']
            stream_name = streams[i]['channel']['name']
            stream_game = streams[i]['game']
            stream_viewers = int(streams[i]['viewers'])
            stream_lang = streams[i]['channel']['broadcaster_language']
            stream_followers = int(streams[i]['channel']['followers'])
            stream_views = int(streams[i]['channel']['views'])
            mini_df = pd.DataFrame({'date':[today],
                                    'streamer_id':[stream_id],
                                    'streamer_name': [stream_name],
                                    'game_name': [stream_game],
                                    'viewers': [stream_viewers],
                                    'language': [stream_lang],
                                    'followers': [stream_followers],
                                    'views': [stream_views]})
            stream_channel = pd.concat([stream_channel,mini_df])
        except:
            pass
stream_channel = stream_channel.reset_index(drop=True)

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))




In [5]:
stream_channel = stream_channel.sort_values(by = 'viewers', ascending = False)
stream_channel

Unnamed: 0,date,streamer_id,streamer_name,game_name,viewers,language,followers,views
40,2020-03-23,31239503,esl_csgo,Counter-Strike: Global Offensive,37795,en,3601791,442939802
10,2020-03-23,103825127,hanryang1125,Animal Crossing: New Horizons,21655,ko,459306,83543469
75,2020-03-23,23735582,sacriel,Escape From Tarkov,17539,en,654718,96110937
41,2020-03-23,213748641,csgomc_ru,Counter-Strike: Global Offensive,17395,ru,313063,21187930
85,2020-03-23,70075625,silvername,Hearthstone,16523,ru,559164,91657502
...,...,...,...,...,...,...,...,...
60,2020-03-23,204936956,hellfox_dota2,Dota 2,1,en,98,2664
61,2020-03-23,476070001,matash_dota2,Dota 2,1,ru,2,52
63,2020-03-23,439515351,dota2kouchtvdemo,Dota 2,0,en,52,1297
62,2020-03-23,200623719,dota2kouchtv,Cuisine Royale,0,en,127,12228


# WEB SCRAPING

## Getting data from Steam

In [6]:
url = 'https://store.steampowered.com/search/?filter=topsellers'
html = requests.get(url).content
soup = BeautifulSoup(html)
mostselledgames = [games.text.strip().replace('\n',' ') for games in soup.find_all('div', attrs = {'class': 'col search_name ellipsis'})]
datereleased = [date.text for date in soup.find_all('div', attrs = {'class': 'col search_released responsive_secondrow'})]
try:
    discount = [discount.text.strip().replace('-','').replace('%','') for discount in soup.find_all('div', attrs = {'class': 'col search_discount responsive_secondrow'})]
except:
    discount = [discount.text.strip() for discount in soup.find_all('div', attrs = {'class': 'col search_discount responsive_secondrow'})]
try:
    price = [float(price.text.strip().split()[-1].replace(',','.')) for price in soup.find_all('div', attrs = {'class': 'col search_price_discount_combined responsive_secondrow'})]
except:
    price = [price.text.strip().split()[-1].replace(',','.') for price in soup.find_all('div', attrs = {'class': 'col search_price_discount_combined responsive_secondrow'})]

data = list(zip(mostselledgames, datereleased, discount, price))
headers = ['game_name', 'date_released', 'discount', 'price (R$)']
mostselled_games = pd.DataFrame(np.array(data), columns = headers)

## Steam Top 50 Selled Games

In [7]:
mostselled_games

Unnamed: 0,game_name,date_released,discount,price (R$)
0,Monster Hunter World: Iceborne,"9 Jan, 2020",25.0,67.49
1,Grand Theft Auto V,"13 Apr, 2015",50.0,34.99
2,Monster Hunter World: Iceborne Master Edition,,21.0,102.69
3,MONSTER HUNTER: WORLD,"9 Aug, 2018",34.0,46.19
4,RESIDENT EVIL 3,2020/04/03,,129.99
5,Assassin's Creed® Odyssey,"5 Oct, 2018",67.0,59.39
6,Half-Life: Alyx VR Only,"23 Mar, 2020",10.0,98.99
7,DOOM Eternal,"19 Mar, 2020",,199.0
8,Age of Empires II: Definitive Edition,"14 Nov, 2019",,36.99
9,Monster Hunter World: Iceborne Master Edition ...,,18.0,122.69


## Players online on steam by game

In [8]:
url = 'https://store.steampowered.com/stats/Steam-Game-and-Player-Statistics'
html = requests.get(url).content
soup = BeautifulSoup(html)
players_online = [players.text.strip().split()[0] for players in soup.find_all('tr', attrs = {'class': 'player_count_row'})]
daily_peak = [players.text.strip().split()[1] for players in soup.find_all('tr', attrs = {'class': 'player_count_row'})]
game_name = [players.find('a').text for players in soup.find_all('tr', attrs = {'class': 'player_count_row'})]

data = list(zip(game_name, daily_peak, players_online))
headers = ['game_name', 'daily_peak', 'players_online']
players_online = pd.DataFrame(np.array(data), columns = headers)

In [9]:
players_online.head(10)

Unnamed: 0,game_name,daily_peak,players_online
0,Counter-Strike: Global Offensive,1102067,997856
1,Dota 2,716416,645420
2,PLAYERUNKNOWN'S BATTLEGROUNDS,573661,539902
3,Grand Theft Auto V,186720,176163
4,Tom Clancy's Rainbow Six Siege,187754,172967
5,MONSTER HUNTER: WORLD,185499,164907
6,Football Manager 2020,182096,141550
7,ARK: Survival Evolved,123040,104431
8,Tomb Raider,88776,85679
9,Destiny 2,98828,82114


# CONNECTING PYTHON WITH POSTGRE

## Creating a function to connect and create a table

In [10]:
def create_table(title: str, df):
    engine = db.create_engine('postgresql://postgres:1fYS.9:f@localhost/games')
    conn = engine.connect()
    df.to_sql(title, con=conn, if_exists = 'replace', index=False)
    conn.close()

In [11]:
create_table(title = 'game_viewers', df = gameviewers)

In [12]:
create_table(title = 'top_channels', df = stream_channel)

In [13]:
create_table(title = 'steam_topselledgames', df = mostselled_games)

KeyError: 'price (R$'

In [None]:
create_table(title = 'steam_playersonline', df = players_online)