In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import seaborn as sns
from twitch import TwitchClient

#### Twitch Client Setup

In [4]:
# creating Twitch Client
client = TwitchClient(client_id = "zjwe67emf2ri3ecyqvihgzhb1r3l4i")

In [71]:
# get user ID from username
# adapted from Twitchly source code
from requests.exceptions import HTTPError
def get_user_id(user_name: str):
    msg = user_name
    try:
        users = client.users.translate_usernames_to_ids([user_name])
        id_ = users[0].id
        # print(msg)
        return id_
    except HTTPError:
        print(msg, 'HTTP')
        return None
    except IndexError:
        print(msg, 'INDEX')
        return None

In [65]:
# get channel ID from username (should be the same)
def get_channel_id(user_name: str):
    return get_user_id(user_name)

# get k followers for a particular channel
def get_all_followers_by_channel_id(channel_id: str, max_followers: int, print_progress=False):
    
    limit = 100 # scroll limit
    
    followers = [] 
    offset = 0
    current_scroll = client.channels.get_followers(channel_id, limit, offset)
    while current_scroll and len(followers) < max_followers:
        for follower in current_scroll:
            offset += 1
            user = follower.user.id
            name = follower.user.name
            followers.append(user)
        try:
            current_scroll = client.channels.get_followers(channel_id, limit, offset)
            if print_progress:
                print('Progress:', offset)
        except HTTPError:
            print('HTTP')
            return followers
    return followers

In [54]:
# get all followers from username (?)
def get_followers_by_user_name(user_name: str, max_followers=50000):
    return get_all_followers_by_channel_id(get_channel_id(user_name), max_followers)

#### Load StreamersList dataframe

In [29]:
# load top user data
streamers = pd.read_csv('StreamersList.csv')
streamers.head()

Unnamed: 0,Streamer,Game
0,Ninja,Fortnite
1,Tfue,Fortnite
2,dakotaz,Fortnite
3,TimTheTatman,Fortnite
4,NICKMERCS,Fortnite


In [41]:
streamers['User ID'] = streamers['Streamer'].map(get_user_id)

Riot Games HTTP
OrgamingLol INDEX
nacrutkahmm INDEX
streamhubus INDEX
mayannagirl INDEX


In [42]:
streamers.head()

Unnamed: 0,Streamer,Game,User ID
0,Ninja,Fortnite,19571641
1,Tfue,Fortnite,60056333
2,dakotaz,Fortnite,39298218
3,TimTheTatman,Fortnite,36769016
4,NICKMERCS,Fortnite,15564828


In [50]:
# Testing on Ninja
followers = [get_followers_by_user_name('Ninja', max_followers=50000)]

Progress: 100
Progress: 200
Progress: 300
Progress: 400
Progress: 500
Progress: 600
Progress: 700
Progress: 800
Progress: 900
Progress: 1000
Progress: 1100
Progress: 1200
Progress: 1300
Progress: 1400
Progress: 1500
Progress: 1600
HTTP


In [51]:
len(followers)

1

In [53]:
followers

[['224506553',
  '247118157',
  '267806694',
  '268331298',
  '252422854',
  '237285031',
  '237610949',
  '268331340',
  '268331202',
  '268196602',
  '257828011',
  '215081060',
  '127324718',
  '254268633',
  '264712714',
  '154747283',
  '268331284',
  '265375913',
  '268331265',
  '268045844',
  '236806345',
  '268331321',
  '266196771',
  '191339288',
  '268158185',
  '268079912',
  '259266203',
  '230889447',
  '245230300',
  '177626930',
  '268329655',
  '267631511',
  '251235793',
  '246831078',
  '268177572',
  '268331544',
  '185740836',
  '267141132',
  '264843555',
  '268331190',
  '242149709',
  '253999128',
  '268330334',
  '258552680',
  '144778117',
  '229176228',
  '268331310',
  '94700465',
  '267817394',
  '268330790',
  '254603566',
  '268315488',
  '268331426',
  '254124111',
  '268327857',
  '268331217',
  '268234221',
  '268330585',
  '268330801',
  '266343605',
  '80215246',
  '268145240',
  '268163578',
  '262925076',
  '268330805',
  '268330300',
  '195737850

#### Notes
- code above seems to work, channel id and user id might be interchangeable
- results in an HTTPError after offset 1700 (maybe we can only pull top 1700 followers?)

In [66]:
streamers['Followers Sample'] = streamers['User ID'].map(lambda x: get_all_followers_by_channel_id(x, 200) if x else None)

In [67]:
len(followers)

100

In [68]:
streamers.head()

Unnamed: 0,Streamer,Game,User ID,Followers Sample
0,Ninja,Fortnite,19571641,"[268234051, 81724021, 245071158, 268333378, 26..."
1,Tfue,Fortnite,60056333,"[106978500, 164967731, 200284827, 268334361, 2..."
2,dakotaz,Fortnite,39298218,"[187187483, 268334145, 268333371, 265698772, 2..."
3,TimTheTatman,Fortnite,36769016,"[263259777, 253454850, 216192694, 267672018, 2..."
4,NICKMERCS,Fortnite,15564828,"[174420795, 204348049, 130534552, 219709133, 1..."


In [72]:
# from the Twitchly source od
def get_all_follows(user_id: str, show_progress=False):
    """Return a list of all channels a user follows"""
    channels = []

    # We want to avoid adding dupliciate users, which the API has retrieved in the past
    added_channel_names = set()
    offset = 0

    followed_channels = client.users.get_follows(user_id, limit=100)
    while followed_channels:
        for followed_channel in followed_channels:
            offset += 1
            channel = followed_channel.channel

            if channel.display_name not in added_channel_names:
                channels.append(followed_channel.channel) # saves entire json
                added_channel_names.add(channel.display_name)

        followed_channels = client.users.get_follows(user_id, limit=100, offset=offset)
        if show_progress:
            print(offset, end=' ')
    if show_progress:
        print("finished")
    return channels

In [74]:
followed_channels_by_user = {}
for sample in streamers['Followers Sample']:
    if sample:
        for user_id in sample:
            followed_channels_by_user[user_id] = get_all_follows(user_id) # list of jsons per user (this dict is gonna be massive)

In [102]:
with open('list_of_channels_followed_by_user.txt', 'w+', encoding='utf-8') as write_file:
    for user_id, followed_channel_list in followed_channels_by_user.items():
        for channel_info in followed_channel_list:
            write_file.write('{}\t{}\n'.format(user_id, channel_info))

### Data collection complete, onto analysis

We were able to pick up data for ~18000 users in this go-around.
Next we will be looking for features in the channel lists that could make the particular channels preferred for the given user.
Something to note: the channels come in order of most recently followed, so we can rely more heavily on newly followed channels to determine user preference.

In [77]:
# total number of users sampled
len(followed_channels_by_user)

18377

In [81]:
# average number of users sampled (per streamer)
len(followed_channels_by_user) / len(followers)

183.77

In [97]:
# average number of followed channels amongst users sampled
np.mean([len(v) for v in followed_channels_by_user.values()])

43.615116721989445

In [98]:
user_channels = pd.DataFrame.from_dict(followed_channels_by_user, orient='index')
user_channels.head()

KeyError: '_typ'

Exception ignored in: 'util.is_period_object'
Traceback (most recent call last):
  File "C:\Users\admin\Anaconda3\lib\site-packages\twitch\resources.py", line 48, in __getattr__
    return self[name]
KeyError: ('_typ',)


KeyError: '__next__'

KeyError: '_typ'

Exception ignored in: 'util.is_period_object'
Traceback (most recent call last):
  File "C:\Users\admin\Anaconda3\lib\site-packages\twitch\resources.py", line 48, in __getattr__
    return self[name]
KeyError: ('_typ',)


KeyError: '__next__'