# In this project, I will generate information for creating a database through an API. The API used will be TwitchAPI, I will try to explore some endpoints to produce analyzable data.
## Twitch: is a streaming environment, with a focus on games but also entertainment.

# Importing libraries

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import date
from tqdm.auto import tqdm
import sqlalchemy as db

# USING API

## Creating tables

### Games, Viewers and Channels

This table will show the games being played the most and their respective channel numbers.

In [82]:
headers = {'Client-ID' : 'h8zvs5dtl6ukn16cvf8s2ade7xw6km', 'Accept' : 'application/vnd.twitchtv.v5+json'}
gameviewers = pd.DataFrame()
for i in range(0,11):
    url = 'https://api.twitch.tv/kraken/games/top?offset='+str(i) +'&Limit=100'
    response = requests.get(url, headers = headers)
    status_code = response.status_code
    games = response.json()['top']
    for game in games:
        today = date.today()
        game_id = game['game']['_id']
        game_name = game['game']['name']
        viewers = int(game['viewers'])
        channels = int(game['channels'])
        mini_df = pd.DataFrame({'date':[today],
                                'id':[game_id],
                               'name': [game_name],
                               'viewers': [viewers],
                               'channels': [channels]})
        gameviewers = pd.concat([gameviewers,mini_df])
gameviewers = gameviewers.reset_index(drop=True)

In [88]:
gameviewers

Unnamed: 0,date,id,name,viewers,channels
0,2020-03-22,509538,Animal Crossing: New Horizons,208080,2190
1,2020-03-22,512710,Call of Duty: Modern Warfare,206889,11108
2,2020-03-22,509658,Just Chatting,159232,2782
3,2020-03-22,32982,Grand Theft Auto V,130663,2095
4,2020-03-22,21779,League of Legends,128670,5427
...,...,...,...,...,...
100,2020-03-22,496983,skribbl.io,23508,50
101,2020-03-22,497057,Destiny 2,22138,1495
102,2020-03-22,27471,Minecraft,20432,2568
103,2020-03-22,29307,Path of Exile,20122,509


### Most viewed channels of top games



In [83]:
stream_channel = pd.DataFrame()
for i in tqdm(range(10)):
    url = 'https://api.twitch.tv/kraken/games/top'
    response = requests.get(url, headers = headers)
    games = response.json()
    top10games = (games['top'][i]['game']['name'])
    url_query = 'https://api.twitch.tv/kraken/search/streams?query='+top10games+'&Limit=100'
    response = requests.get(url_query, headers = headers)
    streams = response.json()['streams']
    for i in range(10):
        try:
            today = date.today()
            stream_id = streams[i]['channel']['_id']
            stream_name = streams[i]['channel']['name']
            stream_game = streams[i]['game']
            stream_viewers = int(streams[i]['viewers'])
            stream_lang = streams[i]['channel']['broadcaster_language']
            stream_followers = int(streams[i]['channel']['followers'])
            stream_views = int(streams[i]['channel']['views'])
            mini_df = pd.DataFrame({'date':[today],
                                    'streamer_id':[stream_id],
                                    'streamer_name': [stream_name],
                                    'game_name': [stream_game],
                                    'viewers': [stream_viewers],
                                    'language': [stream_lang],
                                    'followers': [stream_followers],
                                    'views': [stream_views]})
            stream_channel = pd.concat([stream_channel,mini_df])
        except:
            pass
stream_channel = stream_channel.reset_index(drop=True)

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))




In [84]:
stream_channel = stream_channel.sort_values(by = 'viewers', ascending = False)
stream_channel

Unnamed: 0,date,streamer_id,streamer_name,game_name,viewers,language,followers,views
30,2020-03-22,47071880,yoda,League of Legends,18809,pt,1608695,160314083
10,2020-03-22,88301612,xari,Just Chatting,18191,fr,205125,5303909
20,2020-03-22,163836275,lord_kebun,Grand Theft Auto V,16622,en,379125,24343411
58,2020-03-22,75830338,zanoxvii,FIFA 20,14144,it,323771,5706852
31,2020-03-22,26946000,nightblue3,League of Legends,13953,en,2592034,224781944
...,...,...,...,...,...,...,...,...
66,2020-03-22,116882497,mirza_jahic,FIFA 20,921,de,153494,5865476
67,2020-03-22,252606559,oestagiariodoei,FIFA 20,855,pt,39130,365940
57,2020-03-22,200623719,dota2kouchtv,Cuisine Royale,0,en,127,12228
56,2020-03-22,249879199,dota2kouchtvtestacc,,0,en,95,5180


# WEB SCRAPING

## Getting data from Steam

In [85]:
url = 'https://store.steampowered.com/search/?filter=topsellers'
html = requests.get(url).content
soup = BeautifulSoup(html)
mostselledgames = [games.text.strip() for games in soup.find_all('div', attrs = {'class': 'col search_name ellipsis'})]
datereleased = [date.text for date in soup.find_all('div', attrs = {'class': 'col search_released responsive_secondrow'})]
discount = [discount.text.strip() for discount in soup.find_all('div', attrs = {'class': 'col search_discount responsive_secondrow'})]
try:
    price = [float(price.text.strip().split()[-1].replace(',','.')) for price in soup.find_all('div', attrs = {'class': 'col search_price_discount_combined responsive_secondrow'})]
except:
    price = [price.text.strip().split()[-1].replace(',','.') for price in soup.find_all('div', attrs = {'class': 'col search_price_discount_combined responsive_secondrow'})]

data = list(zip(mostselledgames, datereleased, discount, price))
headers = ['game_name', 'date_released', 'discount', 'price']
mostselled_games = pd.DataFrame(np.array(data), columns = headers)

## Steam Top 50 Selled Games

In [86]:
mostselled_games

Unnamed: 0,game_name,date_released,discount,price
0,Grand Theft Auto V,"13 Apr, 2015",-50%,34.99
1,Age of Empires II: Definitive Edition,"14 Nov, 2019",,36.99
2,Pummel Party,"20 Sep, 2018",,28.99
3,Monster Hunter World: Iceborne,"9 Jan, 2020",-25%,67.49
4,UNO,"3 Jan, 2017",-70%,8.99
5,DOOM Eternal,"19 Mar, 2020",,199.0
6,RESIDENT EVIL 3,2020/04/03,,129.99
7,Motorsport Manager,"9 Nov, 2016",,63.99
8,Counter-Strike: Condition Zero,,,20.69
9,MONSTER HUNTER: WORLD,"9 Aug, 2018",-34%,46.19


## Players online on steam by game

In [37]:
url = 'https://store.steampowered.com/stats/Steam-Game-and-Player-Statistics'
html = requests.get(url).content
soup = BeautifulSoup(html)
players_online = [players.text.strip().split()[0] for players in soup.find_all('tr', attrs = {'class': 'player_count_row'})]
daily_peak = [players.text.strip().split()[1] for players in soup.find_all('tr', attrs = {'class': 'player_count_row'})]
game_name = [players.find('a').text for players in soup.find_all('tr', attrs = {'class': 'player_count_row'})]

data = list(zip(game_name, daily_peak, players_online))
headers = ['game_name', 'daily peak', 'players_online']
players_online = pd.DataFrame(np.array(data), columns = headers)

In [39]:
players_online

Unnamed: 0,game_name,daily peak,players_online
0,Counter-Strike: Global Offensive,1102067,870395
1,Dota 2,717882,527785
2,PLAYERUNKNOWN'S BATTLEGROUNDS,527361,481256
3,Grand Theft Auto V,188463,158046
4,Tom Clancy's Rainbow Six Siege,189717,140130
...,...,...,...
95,Wolcen: Lords of Mayhem,10850,5409
96,ROMANCE OF THE THREE KINGDOMS XIV,7309,5396
97,Crossout,6390,5300
98,F1 2019,10707,5282


# CONNECTING PYTHON WITH POSTGRE

## Creating a function to connect and create a table

In [8]:
def create_table(title: str, df):
    engine = db.create_engine('postgresql://postgres:1fYS.9:f@localhost/games')
    conn = engine.connect()
    df.to_sql(title, con=conn, if_exists = 'replace', index=False)
    conn.close()

In [61]:
create_table(title = 'game_viewers', df = gameviewers)

In [65]:
create_table(title = 'top_channels', df = stream_channel)

In [64]:
create_table(title = 'steam_topselledgames', df = mostselled_games)