In [1]:
from __future__ import unicode_literals
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import re
import os
import sys
import datetime

In [2]:
#config
#reload(sys)
#sys.setdefaultencoding('utf-8')
matplotlib.style.use('ggplot')
%matplotlib inline

#constants
LIVE_DATA_DIRECTORY = '/home/marcel/Development/rbtvdata/live'

## Live Data
___

In [3]:
#live dataframe
columns = ['datetime', 'viewers']
live = pd.DataFrame({}, columns=columns)

#read live files
liveTmp = []
files = sorted(os.listdir(LIVE_DATA_DIRECTORY))
for filename in files:
    if filename.endswith('.csv'):
        filepath = os.path.join(LIVE_DATA_DIRECTORY, filename)
        
        #read file
        openfile = open(filepath)
        rawData = openfile.read()
        openfile.close()
        
        #process each datapoint
        splitRawData = rawData.split('\n')
        datapoints = []
        for line in splitRawData:
            split = line.split(',')
            if len(split) == 2:
                timestamp = split[0]
                viewers = int(split[1])
                datetimeObj = datetime.datetime.fromtimestamp(int(timestamp))
                datapoints.append((datetimeObj, viewers))
        
        live = live.append(pd.DataFrame(datapoints, columns=columns))

#sort data
live = live.sort_values(['datetime'], ascending=[True])

#index
live = live.set_index(live['datetime'])

#remove columns
live = live.drop('datetime', 1)

#sort
live.sort_index(inplace=True)

#resample
live = live.resample('1min').mean().fillna(method='pad', limit=5)

#create additional column
live['day'] = live.index.strftime('%Y-%m-%d')
live['weekday'] = live.index.dayofweek

## Live Data
___

In [4]:
#filter data by date
youtubeStart = datetime.datetime(2016, 9, 10)
youtubeEnd = datetime.datetime.now()
liveFiltered = live.loc[youtubeStart : youtubeEnd]

#filter data by weekday
liveFiltered = liveFiltered[liveFiltered['weekday'] == 5]

#filter data by time (in 30 minutes interval)
rangeStart = datetime.time(19, 30, 0)
rangeEnd = datetime.time(20, 0, 0)
until = datetime.time(0, 0, 0)
while rangeEnd != until:
    #filter data by time
    liveFilteredTime = liveFiltered.between_time(rangeStart, rangeEnd)

    #group messages by date
    liveGrouped = liveFilteredTime.groupby('day')

    #filter groups by minimum number of datapoints
    liveGrouped = liveGrouped.filter(lambda group: len(group) > 25)

    #group messages by date
    liveGrouped = liveGrouped.groupby('day')

    #calculate mean for each day
    liveMean = liveGrouped.apply(lambda group: group['viewers'].mean())
    liveMean = liveMean.to_frame('viewers')

    #output
    print('\n==================================================')
    print(str(rangeStart) + ' - ' + str(rangeEnd))
    print(liveMean)

    #add 30 minutes
    rangeStart = (datetime.datetime.combine(datetime.date.today(), rangeStart) + datetime.timedelta(minutes=30)).time()
    rangeEnd = (datetime.datetime.combine(datetime.date.today(), rangeEnd) + datetime.timedelta(minutes=30)).time()


19:30:00 - 20:00:00
                viewers
day                    
2016-09-10  2207.741935
2016-09-17  2818.354839
2016-09-24  2565.161290

20:00:00 - 20:30:00
                viewers
day                    
2016-09-10  2246.032258
2016-09-17  2870.935484
2016-09-24  2420.838710

20:30:00 - 21:00:00
                viewers
day                    
2016-09-10  2451.838710
2016-09-17  2469.483871
2016-09-24  2367.354839

21:00:00 - 21:30:00
                viewers
day                    
2016-09-10  2304.516129
2016-09-17  2396.225806
2016-09-24  2598.935484

21:30:00 - 22:00:00
                viewers
day                    
2016-09-10  2339.967742
2016-09-17  2376.580645
2016-09-24  2339.709677

22:00:00 - 22:30:00
                viewers
day                    
2016-09-10  2341.483871
2016-09-17  2323.838710

22:30:00 - 23:00:00
                viewers
day                    
2016-09-10  2307.387097
2016-09-17  2276.580645

23:00:00 - 23:30:00
                viewers
day             