In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import seaborn as sns
from twitch import TwitchClient
import twitchly_db

### Data analysis

We were able to pick up data for ~18000 users. We know the channels that these users have followed. Each channel response comprises various channel attributes (shown in the response here: https://dev.twitch.tv/docs/v5/reference/users/#get-user-follows). <br/>

##### Subproblem
Next we want features in the channel lists that correlate highly with channel preference for the given user.
The data is structured so that channels come in order of most recently followed. One idea is that we can rely more heavily on newly followed channels to determine user preference, since preferences may change over time (and we'd like to be as current as possible). 

In [2]:
user_channels = pd.read_csv('list_of_channels_followed_by_user.txt', delimiter='\t', header=None)
user_channels.columns = ['user_id', 'channel_info']
user_channels.head()

Unnamed: 0,user_id,channel_info
0,268234051,"{'mature': False, 'status': '!CUBE IN CHAT FOR..."
1,268234051,"{'mature': False, 'status': 'Morning stream on..."
2,268234051,"{'mature': True, 'status': 'Practice with VicV..."
3,268234051,"{'mature': False, 'status': 'SUB GAMES.. Drunk..."
4,268234051,"{'mature': False, 'status': 'High Kill Solos',..."


In [None]:
import re
channel_matches = []
for i in user_channels['channel_info']:
    m = re.search(r'.*"id": "(\d+)",.*', i)
    if m:
        channel_matches.append(int(m.group(1)))

In [None]:
len(channel_matches)

In [7]:
user_channels['channel_id'] = channel_matches

ValueError: Length of values does not match length of index

In [None]:
import json
import re
import ast
responses = []

for channel in user_channels['channel_info']:
    reformat = channel.replace('\'', '\"').replace('True', 'true').replace('False', 'false').replace(r'(?<date>datetime[^)]*))', r'\"\g<date>\"')
    reformat = reformat.replace(' datetime', ' "datetime').replace("), \"", ")\", \"")
    try:
        response = json.loads(reformat)
    except ValueError:
        print(reformat)

    responses.append(response)
channels_from_file = pd.DataFrame(responses)

{"mature": false, "status": "Morning stream only today! | Pon Pon merch Live! Teamninja.com | @Ninja @teamninja on socials", "broadcaster_language": "en", "broadcaster_software": "unknown_rtmp", "display_name": "Ninja", "game": "Fortnite", "language": "en", "id": "19571641", "name": "ninja", "created_at": "datetime.datetime(2011, 1, 16, 4, 31, 20)", "updated_at": "datetime.datetime(2018, 10, 20, 22, 31, 11)", "partner": true, "logo": "https://static-cdn.jtvnw.net/jtv_user_pictures/cef31105-8a6e-4211-a74b-2f0bbd9791fb-profile_image-300x300.png", "video_banner": "https://static-cdn.jtvnw.net/jtv_user_pictures/8f5af87e-2062-46f8-9e74-ab20d0c2215e-channel_offline_image-1920x1080.png", "profile_banner": "https://static-cdn.jtvnw.net/jtv_user_pictures/3a3a6569-292f-489e-9046-3245a28be5c4-profile_banner-480.png", "profile_banner_background_color": None, "url": "https://www.twitch.tv/ninja", "views": 354014292, "followers": 11872426, "broadcaster_type": "partner", "description": "Professional 

{"mature": false, "status": "Park 91 Grind w/ @Cellz__", "broadcaster_language": "en", "broadcaster_software": "candybox", "display_name": "cellz___", "game": "NBA 2K19", "language": "en", "id": "89659680", "name": "cellz___", "created_at": "datetime.datetime(2015, 4, 27, 3, 19, 20)", "updated_at": "datetime.datetime(2018, 10, 20, 19, 36, 50)", "partner": false, "logo": "https://static-cdn.jtvnw.net/jtv_user_pictures/fcb09a7b-538a-44e5-8118-8d0aeb1c993a-profile_image-300x300.jpg", "video_banner": "https://static-cdn.jtvnw.net/jtv_user_pictures/3fb276f9-52b1-45fb-b5a4-a91635fd71df-channel_offline_image-1920x1080.jpg", "profile_banner": "https://static-cdn.jtvnw.net/jtv_user_pictures/9f46e186-c043-45e9-9c89-e8a1f409ae51-profile_banner-480.jpg", "profile_banner_background_color": None, "url": "https://www.twitch.tv/cellz___", "views": 474, "followers": 172, "broadcaster_type": "", "description": "Whats going on guys my name is cellz . The games I play is Gears Of War 4,,The Division, Dead

{"mature": false, "status": "Нощна смяна Old School Volvo F16 !SUB Събовиците избират музиката! ", "broadcaster_language": "bg", "broadcaster_software": "unknown_rtmp", "display_name": "MrJackp0t", "game": "Euro Truck Simulator 2", "language": "en", "id": "116753572", "name": "mrjackp0t", "created_at": "datetime.datetime(2016, 2, 23, 15, 20, 36)", "updated_at": "datetime.datetime(2018, 10, 20, 22, 7, 4)", "partner": true, "logo": "https://static-cdn.jtvnw.net/jtv_user_pictures/10747f94-3f2b-42f1-8148-902371ce2556-profile_image-300x300.png", "video_banner": "https://static-cdn.jtvnw.net/jtv_user_pictures/ab7a801f-15f4-4dc4-a6f5-df970c976808-channel_offline_image-1920x1080.png", "profile_banner": "https://static-cdn.jtvnw.net/jtv_user_pictures/544bf154-5f84-40b8-95c4-037b01302a63-profile_banner-480.jpg", "profile_banner_background_color": None, "url": "https://www.twitch.tv/mrjackp0t", "views": 109121, "followers": 6213, "broadcaster_type": "partner", "description": "", "private_video": 

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.



{"mature": false, "status": "يالله حيهم ", "broadcaster_language": "ar", "broadcaster_software": "unknown_rtmp", "display_name": "Abo__reja", "game": "Fortnite", "language": "en", "id": "71963260", "name": "abo__reja", "created_at": "datetime.datetime(2014, 9, 26, 17, 38, 45)", "updated_at": "datetime.datetime(2018, 10, 20, 19, 41, 38)", "partner": false, "logo": "https://static-cdn.jtvnw.net/jtv_user_pictures/9341d23e-48c0-42dd-9195-d3ae0c2a4ed5-profile_image-300x300.png", "video_banner": "https://static-cdn.jtvnw.net/jtv_user_pictures/fc3141890ae4c3af-channel_offline_image-1920x1080.png", "profile_banner": "https://static-cdn.jtvnw.net/jtv_user_pictures/31d72be7-db7d-4387-9df8-68de723e4def-profile_banner-480.jpg", "profile_banner_background_color": None, "url": "https://www.twitch.tv/abo__reja", "views": 57836, "followers": 18462, "broadcaster_type": "affiliate", "description": "يا قوم أنا عاطل باطل وبس العب بلاستيشن إنجازي الوحيد عندي قناة في اليوتيوب عدد المشتركين 200الف خش على ال

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.



{"mature": false, "status": "K I Ş K I R T I L D I N #FortniteSezon6", "broadcaster_language": "tr", "broadcaster_software": "unknown_rtmp", "display_name": "Videoyun", "game": "Just Chatting", "language": "tr", "id": "24233423", "name": "videoyun", "created_at": "datetime.datetime(2011, 8, 20, 18, 50, 5)", "updated_at": "datetime.datetime(2018, 10, 20, 22, 3, 45)", "partner": true, "logo": "https://static-cdn.jtvnw.net/jtv_user_pictures/1eda049c091b0f04-profile_image-300x300.jpeg", "video_banner": None, "profile_banner": "https://static-cdn.jtvnw.net/jtv_user_pictures/ec3ce7d63984e51a-profile_banner-480.jpeg", "profile_banner_background_color": None, "url": "https://www.twitch.tv/videoyun", "views": 10576223, "followers": 209356, "broadcaster_type": "partner", "description": "Dünyanın en scamci yayıncısı", "private_video": false, "privacy_options_enabled": false}
{"mature": false, "status": "been drankin watamelon", "broadcaster_language": "en", "broadcaster_software": "unknown_rtmp"

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.


### Adding to Twitchly DB

In [3]:
try:
    db = twitchly_db.Database()
except ValueError:
    db = db

In [4]:
responses = []

In [36]:
for user_id in user_channels['user_id']:
    response = dict(db.get_user_info(user_id))
    print(response, end="\r")
    responses.append(response)

{'broadcaster_language': 'en', 'broadcaster_software': '', 'broadcaster_type': '', 'created_at': '2018-06-06 17:29:58', 'description': '', 'display_name': 'yodagurt', 'followers': 0, 'follows': ['60056333', '152143077', '128489946', '68292748', '223088885', '215308867', '67143805', '50341056', '87204373', '15386355', '110690086', '127651530', '106125347', '29829912', '39298218', '32526505', '67650991', '19571641', '36769016'], 'id': '229191139', 'language': 'en', 'logo': 'https://static-cdn.jtvnw.net/user-default-pictures/49988c7b-57bc-4dee-bd4f-6df4ad215d3a-profile_image-300x300.jpg', 'mature': False, 'name': 'yodagurt', 'partner': False, 'privacy_options_enabled': False, 'private_video': False, 'type': 'user', 'updated_at': '2018-09-14 21:38:31', 'url': 'https://www.twitch.tv/yodagurt', 'views': 1}4373', '15386355', '110690086', '127651530', '106125347', '29829912', '39298218', '32526505', '67650991', '19571641', '36769016']}6', '187057312', '195936982', '228704477', '85698263', '195

KeyboardInterrupt: 

In [37]:
channels = pd.DataFrame(responses)
channels.head()

Unnamed: 0,bio,broadcaster_language,broadcaster_software,broadcaster_type,created_at,description,display_name,followers,follows,game,...,privacy_options_enabled,private_video,profile_banner,profile_banner_background_color,status,type,updated_at,url,video_banner,views
0,,,,,2018-10-20 09:31:48,,ele_tumba357,0,"[32140000, 29829912, 19571641, 75264857, 66691...",,...,False,False,,,,user,2018-10-20 09:31:48,https://www.twitch.tv/ele_tumba357,,0
1,,,,,2018-10-20 09:31:48,,ele_tumba357,0,"[32140000, 29829912, 19571641, 75264857, 66691...",,...,False,False,,,,user,2018-10-20 09:31:48,https://www.twitch.tv/ele_tumba357,,0
2,,,,,2018-10-20 09:31:48,,ele_tumba357,0,"[32140000, 29829912, 19571641, 75264857, 66691...",,...,False,False,,,,user,2018-10-20 09:31:48,https://www.twitch.tv/ele_tumba357,,0
3,,,,,2018-10-20 09:31:48,,ele_tumba357,0,"[32140000, 29829912, 19571641, 75264857, 66691...",,...,False,False,,,,user,2018-10-20 09:31:48,https://www.twitch.tv/ele_tumba357,,0
4,,,,,2018-10-20 09:31:48,,ele_tumba357,0,"[32140000, 29829912, 19571641, 75264857, 66691...",,...,False,False,,,,user,2018-10-20 09:31:48,https://www.twitch.tv/ele_tumba357,,0


In [38]:
channels.shape

(5435, 26)

In [39]:
len(channels['display_name'].unique())

373

In [40]:
len(responses)

5435

In [50]:
followed_channels = channels[channels['followers']!=0]
followed_channels.shape

(1653, 26)

In [52]:
followed_channels.describe()

Unnamed: 0,followers,views
count,1653.0,1653.0
mean,21.999395,151.350877
std,50.467602,209.407634
min,1.0,0.0
25%,3.0,8.0
50%,4.0,45.0
75%,11.0,150.0
max,181.0,600.0
