In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from datetime import datetime as dt
%matplotlib inline
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

In [2]:
import json

json_data = json.loads(open('video_stats.txt').read())
json_data[:2]

[{'etag': '"DuHzAJ-eQIiCIp7p4ldoVcVAOeY/3F9deMVKUOCzfXCPqwlQ46zRcbs"',
  'items': [{'etag': '"DuHzAJ-eQIiCIp7p4ldoVcVAOeY/aFJsSmDi9XPxBq8FoZcpx9uvWjo"',
    'id': 'lLWEXRAnQd0',
    'kind': 'youtube#video',
    'statistics': {'commentCount': '20867',
     'dislikeCount': '2206',
     'favoriteCount': '0',
     'likeCount': '149323',
     'viewCount': '7393172'}}],
  'kind': 'youtube#videoListResponse',
  'pageInfo': {'resultsPerPage': 1, 'totalResults': 1}},
 {'etag': '"DuHzAJ-eQIiCIp7p4ldoVcVAOeY/Sc7wrmO-ayFpGV4E1tgm6d2rmY4"',
  'items': [{'etag': '"DuHzAJ-eQIiCIp7p4ldoVcVAOeY/ZbuaG07r4lhthDQ-WCVFBVYrUdA"',
    'id': 'kJFB6rH3z2A',
    'kind': 'youtube#video',
    'statistics': {'commentCount': '2314',
     'dislikeCount': '330',
     'favoriteCount': '0',
     'likeCount': '17582',
     'viewCount': '1403034'}}],
  'kind': 'youtube#videoListResponse',
  'pageInfo': {'resultsPerPage': 1, 'totalResults': 1}}]

## The `items` field seems to have what we want, but it's a list. Is there always exactly one element in it?

In [3]:
# Method 1
num_items = [len(js['items']) for js in json_data]
pd.Series(num_items).value_counts()

1    418
dtype: int64

In [4]:
# Method 2
all(len(js['items']) == 1 for js in json_data)

True

## OK, so each 'items' field is a 1-element list, containing all the data we need

In [5]:
def extract_record(js):
    js = js['items'][0]
    record = {}
    record['id'] = js['id']
    record['etag'] = js['etag']
    for stat, value in js['statistics'].items():
        record[stat] = int(value)
    return record

In [6]:
records = [extract_record(js) for js in json_data]
df = pd.DataFrame.from_records(records)
print("Read %d video records" % len(df))
df.head()

Read 418 video records


Unnamed: 0,commentCount,dislikeCount,etag,favoriteCount,id,likeCount,viewCount
0,20867,2206,"""DuHzAJ-eQIiCIp7p4ldoVcVAOeY/aFJsSmDi9XPxBq8Fo...",0,lLWEXRAnQd0,149323,7393172
1,2314,330,"""DuHzAJ-eQIiCIp7p4ldoVcVAOeY/ZbuaG07r4lhthDQ-W...",0,kJFB6rH3z2A,17582,1403034
2,456,92,"""DuHzAJ-eQIiCIp7p4ldoVcVAOeY/0dcbs-Whhl78lEcfN...",0,tWoInh2USOs,3852,446389
3,2956,636,"""DuHzAJ-eQIiCIp7p4ldoVcVAOeY/2inPtuSefvcAs7Hce...",0,UQ-RTZCOQn0,29745,2901816
4,1573,308,"""DuHzAJ-eQIiCIp7p4ldoVcVAOeY/rYRuYzUPOZzEjywrv...",0,TohG7F8M3Ls,13879,1150437


In [7]:
df.to_csv('video_stats.csv', index=False)