In [1]:
import igraph
from langdetect import detect

In [2]:
import pandas as pd

In [3]:
def parse_gdf_channel(file_name):
    with open(file_name) as f:
        rows = [line.strip().split(',') for line in f.readlines()]
        # print(rows)
        metadata = [row for row in rows if len(row) > 3]
        graph_data = [row for row in rows if len(row) == 3]
        return pd.DataFrame(metadata[1:], columns=metadata[0]), pd.DataFrame(
            graph_data[1:], columns=graph_data[0])

def parse_gdf_video(file_name):
    with open(file_name, encoding='utf-8') as f:
        rows = [line.strip().split(',') for line in f.readlines()]
        # print(rows)
        metadata = [row[:14] for row in rows if len(row) > 3]
        graph_data = [row for row in rows if len(row) == 3]
        
        return pd.DataFrame(metadata[1:], columns=metadata[0]), pd.DataFrame(
            graph_data[1:], columns=graph_data[0])
    

def all_caps_to_proper(string):
    out_string = ''
    for token in string.split(' '):
        if token.isupper():
            out_string += ' ' + token.capitalize()
        else:
            out_string += ' ' + token
    return out_string.strip()
            

In [4]:
df_channel_metadata, df_channel_graph = parse_gdf_channel(
    'channelnet_seeds1_nodes18_2019_05_07-12_30_18.gdf')

FileNotFoundError: [Errno 2] No such file or directory: 'channelnet_seeds1_nodes18_2019_05_07-12_30_18.gdf'

In [20]:
def join_update_clean_dfs(df_graph, df_metadata, title, replace_node_ids=True):
    df_metadata.rename(columns={
        'nodedef>name VARCHAR': 'nodeid',
        'label VARCHAR': 'label',
        'subscriberCount INT': 'subscriberCount'
    },
        inplace=True)
    df_graph.rename(columns={
        'edgedef>node1 VARCHAR': 'nodeid_1',
        'node2 VARCHAR': 'nodeid_2',
        'directed BOOLEAN': 'directed'
    },
        inplace=True)
    if replace_node_ids is True:
        df_graph = pd.merge(df_graph,
                            df_metadata[['label', 'nodeid']],
                            left_on='nodeid_1',
                            right_on='nodeid',
                            how='right')[['label', 'nodeid_2', 'directed'
                                          ]].rename(columns={'label': 'node1'})

        df_graph = pd.merge(df_graph,
                            df_metadata[['label', 'nodeid']],
                            left_on='nodeid_2',
                            right_on='nodeid',
                            how='right')[['node1', 'label', 'directed'
                                          ]].rename(columns={'label': 'node2'})
    df_metadata.to_csv(title + '_metadata.csv')
    df_graph.to_csv(title + '_relations.csv')
    
    return df_metadata, df_graph


def load_video_data(filename):
    df_video_metadata, df_video_rel = parse_gdf_video(filename + '.gdf')        
    # clean up titles for language classification
    df_video_metadata['label_clean'] = df_video_metadata['label VARCHAR'].apply(all_caps_to_proper)
    # detect video title language 
    df_video_metadata['title_language'] = df_video_metadata['label_clean'].apply(detect)
    # clean column names and update node ids
    df_video_metadata, df_video_graph = join_update_clean_dfs(df_video_rel, df_video_metadata, filename)
    df_video_metadata['url'] = 'https://www.youtube.com/watch?v=' + df_video_metadata['nodeid']
    df_video_metadata['seed_channel'] = df_video_metadata['channelId VARCHAR'].apply(lambda x : True if x == df_video_metadata['channelId VARCHAR'][0] else False)
    return df_video_metadata, df_video_graph
    

def load_channel_data(filename):
    df_metadata, df_rel = parse_gdf_channel(filename + '.gdf')        
    # clean column names and update node ids
    df_metadata, df_graph = join_update_clean_dfs(df_rel, df_metadata, filename)
    df_metadata['url'] = 'https://www.youtube.com/channel/' + df_metadata['nodeid']
    df_metadata.to_csv(filename + '_metadata.csv')
    df_graph.to_csv(filename + '_relations.csv')
    return df_metadata, df_graph
    

    
df_video_metadata, df_video_graph = load_video_data('videonet_baby_shark_2019_05_14')

In [19]:
df_channel_metadata, df_channel_graph = load_channel_data('channelnet_disney_2019_05_07')

In [17]:
df_channel_graph

Unnamed: 0,node1,node2,directed
0,Walt Disney Studios,Disney Family,true
1,The Muppets,Disney Family,true
2,Walt Disney Animation Studios,Disney Family,true
3,Pixar,Disney Family,true
4,Disney Movies,Disney Family,true
5,Disney,Disney Family,true
6,Oh My Disney,Disney Family,true
7,Disney Style,Disney Family,true
8,Mickey Mouse,Disney Family,true
9,Babble,Disney Family,true


In [21]:
df_video_metadata

Unnamed: 0,nodeid,label,isSeed VARCHAR,seedRank INT,publishedAt INT,channelTitle VARCHAR,channelId VARCHAR,videoCategoryLabel VARCHAR,viewCount INT,likeCount INT,dislikeCount INT,dislikeLikeRatio FLOAT,favoriteCount INT,commentCount INT,label_clean,title_language,url,seed_channel
0,XqZsoesa55w,Baby Shark Dance | Sing and Dance! | Animal So...,yes,1,1466204430,Pinkfong! Kids Songs & Stories,UCcdwLMPsaU2ezNSJU1nFoBQ,Education,2762119599,7160786,2567215,0.35851022499485,0,,Baby Shark Dance | Sing and Dance! | Animal So...,en,https://www.youtube.com/watch?v=XqZsoesa55w,True
1,7GjOOyBoELw,Johny Johny Yes Papa - Great Songs for Childre...,no,,1484467232,LooLoo Kids - Nursery Rhymes and Children s Songs,UC4NALVCmcmL5ntpV0thoH6w,Music,270789467,604683,339558,0.56154712469178,0,21722,Johny Johny Yes Papa - Great Songs for Childre...,en,https://www.youtube.com/watch?v=7GjOOyBoELw,False
2,-xSdvHAgQlo,Making 3 Glitter Candy Ice Cream & Learn Color...,no,,1557755547,Fun Kids Yuppi Tv,UCNjXaCXTB-LWs6R7UxKSo4w,Entertainment,669460,,,,0,60,Making 3 Glitter Candy Ice Cream & Learn Color...,en,https://www.youtube.com/watch?v=-xSdvHAgQlo,False
3,spomkBAvOdM,🔴 Peppa Pig Live | Peppa Pig Official | Peppa ...,no,,1557821931,Peppa Pig - Official Channel,UCAOtE1V7Ots4DjM8JLlrYgg,Film & Animation,117172,1750,808,0.46171428571429,0,0,🔴 Peppa Pig Live | Peppa Pig Official | Peppa ...,en,https://www.youtube.com/watch?v=spomkBAvOdM,False
4,HPMEKQtWZbo,Nursery Rhymes & Kids Songs Live Stream - Rain...,no,,1557816480,All Babies Channel - 3D Nursery Rhymes For Babies,UCMry5jwn3fSxXqnqUUIcR0Q,Education,3663,277,135,0.48736462093863,0,,Nursery Rhymes & Kids Songs Live Stream - Rain...,en,https://www.youtube.com/watch?v=HPMEKQtWZbo,False
5,F4tHL8reNCs,Johny Johny Yes Papa 👶 THE BEST Song for Child...,no,,1475910395,LooLoo Kids - Nursery Rhymes and Children s Songs,UC4NALVCmcmL5ntpV0thoH6w,Music,1777009343,3739601,2193116,0.58645721829682,0,77346,Johny Johny Yes Papa 👶 The Best Song for Child...,en,https://www.youtube.com/watch?v=F4tHL8reNCs,False
6,R93ce4FZGbc,Baby Shark | Animal Songs | PINKFONG Songs for...,no,,1448492401,Pinkfong! Kids Songs & Stories,UCcdwLMPsaU2ezNSJU1nFoBQ,Education,206375349,482213,202010,0.41892275820021,0,,Baby Shark | Animal Songs | Pinkfong Songs for...,en,https://www.youtube.com/watch?v=R93ce4FZGbc,True
7,_b9F05WE28Y,👶 Baby Songs | Dave and Ava | Nursery Rhymes 👶,no,,1557832469,Dave and Ava - Nursery Rhymes and Baby Songs,UC6zhI71atP7YLoZyIyCIGNw,Education,511,49,26,0.53061224489796,0,0,👶 Baby Songs | Dave and Ava | Nursery Rhymes 👶,en,https://www.youtube.com/watch?v=_b9F05WE28Y,False
8,GbwZKuSJeec,Om Nom Stories: BEST OF ALL SEASONS | Cut the ...,no,,1556882862,HooplaKidz TV - Funny Cartoons For Children,UCMfZ_z0LUm805JOZLktl2QQ,Education,2831804,8573,3773,0.44010264784789,0,,Om Nom Stories: Best Of All Seasons | Cut the ...,en,https://www.youtube.com/watch?v=GbwZKuSJeec,False
9,_n4SXZLU7aA,ChuChu TV Nursery Rhymes & Kids Songs Live Str...,no,,1557813602,ChuChu TV Nursery Rhymes & Kids Songs,UCBnZ16ahKA2DZ_T5W0FPUXg,Education,12640,838,430,0.51312649164678,0,0,ChuChu Tv Nursery Rhymes & Kids Songs Live Str...,en,https://www.youtube.com/watch?v=_n4SXZLU7aA,False


In [22]:
df_video_graph

Unnamed: 0,node1,node2,directed
0,Baby Shark Dance | Sing and Dance! | Animal So...,Johny Johny Yes Papa - Great Songs for Childre...,true
1,Johny Johny Yes Papa 👶 THE BEST Song for Child...,Johny Johny Yes Papa - Great Songs for Childre...,true
2,Om Nom Stories: BEST OF ALL SEASONS | Cut the ...,Johny Johny Yes Papa - Great Songs for Childre...,true
3,Learn Colors and Vehicles Toys for Kids Firema...,Johny Johny Yes Papa - Great Songs for Childre...,true
4,Baby Shark and 50+ Songs | + Compilation | PIN...,Johny Johny Yes Papa - Great Songs for Childre...,true
5,🚍 The Wheels On The Bus 🚌 Fun Songs for Childr...,Johny Johny Yes Papa - Great Songs for Childre...,true
6,Hi Baby Shark Let’s Do the Baby Shark Dance! ...,Johny Johny Yes Papa - Great Songs for Childre...,true
7,Baby Shark - Educational Songs for Children | ...,Johny Johny Yes Papa - Great Songs for Childre...,true
8,Baby Shark and more | +Compilation | Dance Dan...,Johny Johny Yes Papa - Great Songs for Childre...,true
9,Twinkle Twinkle Little Star and Many More Vide...,Johny Johny Yes Papa - Great Songs for Childre...,true
