In [1]:
import igraph
from langdetect import detect

In [2]:
import pandas as pd

In [3]:
def parse_gdf_channel(file_name):
    with open(file_name) as f:
        rows = [line.strip().split(',') for line in f.readlines()]
        # print(rows)
        metadata = [row for row in rows if len(row) > 3]
        graph_data = [row for row in rows if len(row) == 3]
        return pd.DataFrame(metadata[1:], columns=metadata[0]), pd.DataFrame(
            graph_data[1:], columns=graph_data[0])

def parse_gdf_video(file_name):
    with open(file_name, encoding='utf-8') as f:
        rows = [line.strip().split(',') for line in f.readlines()]
        # print(rows)
        metadata = [row[:14] for row in rows if len(row) > 3]
        graph_data = [row for row in rows if len(row) == 3]
        
        return pd.DataFrame(metadata[1:], columns=metadata[0]), pd.DataFrame(
            graph_data[1:], columns=graph_data[0])
    
    
df_video_metadata, df_video_rel = parse_gdf_video(
    'videonet_seeds1_nodes88_2019_05_08-07_42_41.gdf')   
df_video_metadata['title language'] = df_video_metadata['label VARCHAR'].apply(detect)
df_video_metadata

Unnamed: 0,nodedef>name VARCHAR,label VARCHAR,isSeed VARCHAR,seedRank INT,publishedAt INT,channelTitle VARCHAR,channelId VARCHAR,videoCategoryLabel VARCHAR,viewCount INT,likeCount INT,dislikeCount INT,dislikeLikeRatio FLOAT,favoriteCount INT,commentCount INT,title language
0,7NiYVoqBt-8,Dumbo Official Trailer,yes,1,1542246427,Walt Disney Studios,UCuaFvcY4MhZY3U43mMt1dYQ,Film & Animation,16333333,238052,11587,0.048674239241846,0,22247,en
1,0LhQ7quaZy4,10 Animated Movies That Were Changed In Other ...,no,,1520694000,Screen Rant,UC2iUwfYi_1FCGGqhOUNx-iA,Entertainment,4191315,28108,2662,0.094706133485129,0,2439,en
2,4ZoVVyrjn4Y,ÿ¥ÿßŸáÿØ ÿßŸÑŸÅŸäÿØŸäŸà ŸÑÿ™ÿπÿ±ŸÅ ÿ≠ŸÇŸäŸÇÿ© ÿßŸÑŸÇÿ∑ÿ∑,no,,1513484626,ŸÇŸÜÿßÿ© ÿßŸÑŸÖÿπÿ±ŸÅÿ© | Almarefa channel,UC1Fnr_avpq-NtnG_R4q4xoQ,Pets & Animals,10138925,48543,11503,0.23696516490534,0,2599,ar
3,9KZKJpOPo_E,üî¥ WATCH NOW! BEST CLASSIC TOM & JERRY MOMENTS ...,no,,1550861984,WB Kids,UC9trsD1jCTXXtN3xIOIU8gg,Film & Animation,10514669,77723,15996,0.20580780463955,0,,en
4,r_qdACxgZM4,KARPUZLA YAPABƒ∞LECEƒûƒ∞Nƒ∞Z 15 ƒ∞NANILMAZ KOLAY Fƒ∞Kƒ∞R,no,,1553954408,5 DAKƒ∞KADA HALLET,UCMX67L1CGxutIdUynvpQnmA,Howto & Style,5327214,15784,6257,0.39641409021794,0,462,tr
5,vCfw4gcb2lI,Justice League cast ‚òÖ Before and After 2017,no,,1512136809,toptenfamous,UC_-BKZHFHcXKIQ_C5vPV4Ug,People & Blogs,10932787,28160,8025,0.28497869318182,0,1179,en
6,5abJ0rNIuqk,Super Kids Games Live ‚Äì Cars for Kids Learn Co...,no,,1557287154,Super Kids Games,UCw1rV3Z6pKN3rSPklveRw2A,Education,30926,288,178,0.61805555555556,0,0,en
7,0zy-Kp_zywU,THE SECRET LIFE OF PETS 2: All NEW Trailers (2...,no,,1548950949,FilmSelect Trailer,UCT0hbLDa-unWsnZ6Rjzkfug,Entertainment,8100932,75301,2411,0.032018167089414,0,2940,en
8,7M-uqMmJdeo,ÿ™ÿπŸÑŸÖ ÿßŸÑÿßŸÑŸàÿßŸÜ ÿ®ÿßŸÑÿ•ŸÜÿ¨ŸÑŸäÿ≤Ÿäÿ© ŸÑŸÑÿßÿ∑ŸÅÿßŸÑ ŸÖÿπ ÿßÿ∫ŸÜŸäÿ© ŸÅŸäŸÜÿ¨...,no,,1509352572,cocoz toon,UCNZm7rvDZgCZrGtEBlVFidg,Film & Animation,246414403,732608,329588,0.44988315715908,0,,ar
9,x8bo5unaMpI,Dumbo 1941 - Best Memorable Moments [HD],no,,1555608370,T Pictures,UCdXj3nJ6vpyzUFpyimgSBHQ,People & Blogs,55994,138,24,0.17391304347826,0,6,fr


In [4]:
df_channel_metadata, df_channel_graph = parse_gdf_channel(
    'channelnet_seeds1_nodes18_2019_05_07-12_30_18.gdf')

In [8]:
def join_update_clean_dfs(df_graph, df_metadata, title):
    df_metadata.rename(columns={
        'nodedef>name VARCHAR': 'nodeid',
        'label VARCHAR': 'label',
        'subscriberCount INT': 'subscriberCount'
    },
        inplace=True)
    df_graph.rename(columns={
        'edgedef>node1 VARCHAR': 'nodeid_1',
        'node2 VARCHAR': 'nodeid_2',
        'directed BOOLEAN': 'directed'
    },
        inplace=True)
    df_graph = pd.merge(df_graph,
                        df_metadata[['label', 'nodeid']],
                        left_on='nodeid_1',
                        right_on='nodeid',
                        how='right')[['label', 'nodeid_2', 'directed'
                                      ]].rename(columns={'label': 'node1'})

    df_graph = pd.merge(df_graph,
                        df_metadata[['label', 'nodeid']],
                        left_on='nodeid_2',
                        right_on='nodeid',
                        how='right')[['node1', 'label', 'directed'
                                      ]].rename(columns={'label': 'node2'})
    df_metadata.to_csv(title + '_metadata.csv')
    df_graph.to_csv(title + '_relations.csv')
    
    return df_metadata, df_graph

df_metadata, df_graph = join_update_clean_dfs(df_channel_graph, df_channel_metadata, 'disney_channel')
df_graph

Unnamed: 0,node1,node2,directed
0,Walt Disney Studios,Disney Family,true
1,The Muppets,Disney Family,true
2,Walt Disney Animation Studios,Disney Family,true
3,Pixar,Disney Family,true
4,Disney Movies,Disney Family,true
5,Disney,Disney Family,true
6,Oh My Disney,Disney Family,true
7,Disney Style,Disney Family,true
8,Mickey Mouse,Disney Family,true
9,Babble,Disney Family,true


In [9]:
df_metadata

Unnamed: 0,nodeid,label,isSeed VARCHAR,seedRank INT,subscriberCount,videoCount INT,viewCount(100s) INT,country VARCHAR,publishedAt VARCHAR,daysactive INT
0,UCuaFvcY4MhZY3U43mMt1dYQ,Walt Disney Studios,yes,1.0,2499412,947,10893293,US,2008-11-18T20:34:44.000Z,3822
1,UCAwm8rSWCoi94powYWnhz6Q,Disney Family,no,,228620,336,206188,US,2005-11-24T06:58:39.000Z,4912
2,UCB0ABGbdSggBXLF0UdN4d5A,The Muppets,no,,702469,277,3494887,not set,2009-11-20T01:57:11.000Z,3455
3,UC_976xMxPgzIa290Hqtk-9g,Walt Disney Animation Studios,no,,3511643,347,22436777,US,2008-03-25T16:32:01.000Z,4060
4,UC_IRYSp4auq7hKLvziWVH6w,Pixar,no,,2552333,521,12507275,US,2006-09-19T17:42:01.000Z,4613
5,UC4IEmIzWf_X4lEPtGm7kgzQ,Disney Movies,no,,509143,310,898677,not set,2007-03-31T02:38:25.000Z,4420
6,UC_5niPa-d35gg88HaS7RrIw,Disney,no,,3632930,776,22566493,not set,2012-02-11T12:07:58.000Z,2642
7,UCbFnGyIcAhDrj-90OzvI7kw,Oh My Disney,no,,344389,450,963857,US,2013-07-11T06:09:13.000Z,2126
8,UCZSVJrC2Hnu92n2Lhez3KgA,Disney Style,no,,547115,477,1534234,not set,2012-02-27T20:05:43.000Z,2626
9,UC5K8SEF_7GQBedXIjtXLCRg,Mickey Mouse,no,,3008416,249,20604685,not set,2008-03-12T17:52:01.000Z,4073


In [10]:
df_metadata, df_graph = join_update_clean_dfs(df_video_rel, df_video_metadata, 'disney_video')
df_graph

Unnamed: 0,node1,node2,directed
0,Dumbo Official Trailer,10 Animated Movies That Were Changed In Other ...,true
1,Dumbo Official Trailer,ÿ¥ÿßŸáÿØ ÿßŸÑŸÅŸäÿØŸäŸà ŸÑÿ™ÿπÿ±ŸÅ ÿ≠ŸÇŸäŸÇÿ© ÿßŸÑŸÇÿ∑ÿ∑,true
2,Dumbo Official Trailer,üî¥ WATCH NOW! BEST CLASSIC TOM & JERRY MOMENTS ...,true
3,THE SECRET LIFE OF PETS 2: All NEW Trailers (2...,üî¥ WATCH NOW! BEST CLASSIC TOM & JERRY MOMENTS ...,true
4,Dumbo 1941 - Best Memorable Moments [HD],üî¥ WATCH NOW! BEST CLASSIC TOM & JERRY MOMENTS ...,true
5,THE LION KING Full Movie Trailer (2019),üî¥ WATCH NOW! BEST CLASSIC TOM & JERRY MOMENTS ...,true
6,Top 10 Disney Moments That Made Us Ugly Cry,üî¥ WATCH NOW! BEST CLASSIC TOM & JERRY MOMENTS ...,true
7,Purl | Pixar SparkShorts,üî¥ WATCH NOW! BEST CLASSIC TOM & JERRY MOMENTS ...,true
8,The Lion King Official Teaser Trailer,üî¥ WATCH NOW! BEST CLASSIC TOM & JERRY MOMENTS ...,true
9,new animation movies 2019 full movies english ...,üî¥ WATCH NOW! BEST CLASSIC TOM & JERRY MOMENTS ...,true


In [None]:
df_metadata

In [None]:
df_graph.to_csv('disney_channel_relations.csv')

In [None]:
df_graph

In [None]:
help(igraph)