### Imports

In [1]:
import pandas as pd
from instagrapi import Client
import time
from itertools import cycle
import ast
import json

import networkx as nx
from pyvis.network import Network
import community.community_louvain as community_louvain

import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

#### Assess instagrapi

In [2]:
# Cred constants
ACCOUNT_USERNAME = '****'
ACCOUNT_PASSWORD = '****'

In [3]:
# Connect to the instagrapi client
cl = Client()
cl.login(ACCOUNT_USERNAME, ACCOUNT_PASSWORD)

True

In [4]:
# Delay in requests to mimic the real user 
cl.delay_range = [1, 3] 

### Load and preapre the data

In [5]:
user_friends = pd.read_csv('data/user_friends.csv')
user_friends

Unnamed: 0.1,Unnamed: 0,id,friends_id,num_following
0,0,54120777372,['371806449'],1
1,1,191716116,"['56518990213', '528817151', '13460080', '2309...",1039
2,2,16596631,"['56518990213', '259220806', '6739053665', '58...",579
3,3,44577963743,"['56518990213', '46023299420', '43055553379', ...",185
4,4,58618143472,"['56518990213', '1333014392', '314834803', '59...",205
...,...,...,...,...
355,355,970260212,"['56518990213', '10480357', '695995017', '5123...",1327
356,356,2045670253,"['56518990213', '259220806', '25945306', '1842...",1032
357,357,30989427735,"['56518990213', '212971662', '54858589934', '1...",145
358,358,332849787,"['56518990213', '460563723', '8032582326', '22...",411


In [6]:
user_friends['friends_id'] = user_friends['friends_id'].apply(ast.literal_eval)

In [7]:
# Unpack the friend_id column so there're pairs of id-friend_id
pairs = []

for _, row in user_friends.iterrows():
    id_value = row['id']
    friends_list = row['friends_id']
    
    for friend in friends_list:
        pairs.append({'id': id_value, 'friend_id': friend})


In [8]:
pairs_df = pd.DataFrame(pairs)
pairs_df

Unnamed: 0,id,friend_id
0,54120777372,371806449
1,191716116,56518990213
2,191716116,528817151
3,191716116,13460080
4,191716116,23098233
...,...,...
259296,1028011269,1629085051
259297,1028011269,2220681721
259298,1028011269,28097051580
259299,1028011269,2860332499


In [34]:
pairs_df.to_csv('data/pairs_df.csv')

In [9]:
# Let's look at the accounts our followers follow the most
top_following = pd.DataFrame(pairs_df[['friend_id']].value_counts()).reset_index()
top_following.head(30)

Unnamed: 0,friend_id,count
0,56518990213,354
1,8577689525,98
2,53842752932,91
3,9777455,87
4,52236156625,86
5,6380930,86
6,16516077,86
7,528817151,82
8,3154258315,77
9,52742526132,74


In [10]:
# Cut to the 100 (+1) most popular pages
top_100_following = top_following[0:100]

In [11]:
# Get the username of most followed pages
top_100_following['top_friend_username'] = top_100_following['friend_id'].apply(lambda x: cl.username_from_user_id(x))
top_100_following 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_100_following['top_friend_username'] = top_100_following['friend_id'].apply(lambda x: cl.username_from_user_id(x))


Unnamed: 0,friend_id,count,top_friend_username
0,56518990213,354,frames__store
1,8577689525,98,magnoliafilmlab
2,53842752932,91,retro_camera_shop
3,9777455,87,zendaya
4,52236156625,86,tbilisi_pike
...,...,...,...
95,43851908194,36,tes_club_
96,6953843386,36,lechicradical
97,37130712232,36,sportsmoodco
98,220428576,36,irinashayk


In [19]:
top_100_following.to_csv('data/top_100_following.csv')

In [35]:
top_100_following.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Unnamed: 0           100 non-null    int64 
 1   friend_id            100 non-null    int64 
 2   count                100 non-null    int64 
 3   top_friend_username  100 non-null    object
dtypes: int64(3), object(1)
memory usage: 3.3+ KB


In [36]:
pairs_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 259301 entries, 0 to 259300
Data columns (total 2 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   id         259301 non-null  int64 
 1   friend_id  259301 non-null  object
dtypes: int64(1), object(1)
memory usage: 4.0+ MB


In [39]:
# Transforming id to string for filter and graph visualization later
top_100_following['friend_id'] = top_100_following['friend_id'].apply(lambda x: str(x))

In [40]:
# Sebset of id-friends_id where friends_id is one of the most popular pages
pairs_with_100_following = pairs_df[pairs_df['friend_id'].isin(top_100_following['friend_id'])]
pairs_with_100_following

Unnamed: 0,id,friend_id
1,191716116,56518990213
2,191716116,528817151
97,191716116,52236156625
632,191716116,32163385284
1040,16596631,56518990213
...,...,...
258914,1028011269,7761896532
258987,1028011269,39128469026
259004,1028011269,45695766729
259009,1028011269,32994954632


In [41]:
# Adding the page names
pairs_with_100_following = pd.merge(pairs_with_100_following, top_100_following[['friend_id','top_friend_username']], how = 'left', on = 'friend_id')
pairs_with_100_following

Unnamed: 0,id,friend_id,top_friend_username
0,191716116,56518990213,frames__store
1,191716116,528817151,nasa
2,191716116,52236156625,tbilisi_pike
3,191716116,32163385284,plantloverstbilisi
4,16596631,56518990213,frames__store
...,...,...,...
5171,1028011269,7761896532,matters.ge
5172,1028011269,39128469026,printshop_jpg
5173,1028011269,45695766729,retro_bazaari
5174,1028011269,32994954632,the_arthousegallery


### Network Visualization
Let's try to build a network to explore the relationship between top pages

In [44]:
# Since pairs_with_100_following has 5176 rows and it might be too big for visualization, let's take top 10 pages
pairs_top_10 = pairs_with_100_following[pairs_with_100_following['friend_id'].isin(top_100_following['friend_id'][0:9])]
pairs_top_10

Unnamed: 0,id,friend_id,top_friend_username
0,191716116,56518990213,frames__store
1,191716116,528817151,nasa
2,191716116,52236156625,tbilisi_pike
4,16596631,56518990213,frames__store
6,16596631,9777455,zendaya
...,...,...,...
5137,332849787,56518990213,frames__store
5144,332849787,8577689525,magnoliafilmlab
5149,1028011269,56518990213,frames__store
5150,1028011269,528817151,nasa


In [52]:
G = nx.from_pandas_edgelist(pairs_top_10, 
                            source = "id", 
                            target = "top_friend_username", 
                            create_using = nx.Graph())

In [53]:
net = Network(notebook = True,
            cdn_resources='in_line', 
            width="1000px", 
            height="700px", 
            bgcolor='#ffffff', 
            font_color='black',
            select_menu=True,
            filter_menu=True)

net.show_buttons()
net.toggle_physics(True)

node_degree = dict(G.degree)

#Setting up node size attribute
nx.set_node_attributes(G, node_degree, 'size')

In [54]:
# Check the attributes
for node, attributes in G.nodes(data=True):
    print("Node:", node)
    print("Attributes:", attributes)

Node: 191716116
Attributes: {'size': 3}
Node: frames__store
Attributes: {'size': 354}
Node: nasa
Attributes: {'size': 82}
Node: tbilisi_pike
Attributes: {'size': 86}
Node: 16596631
Attributes: {'size': 4}
Node: zendaya
Attributes: {'size': 87}
Node: magnoliafilmlab
Attributes: {'size': 98}
Node: 44577963743
Attributes: {'size': 2}
Node: 58618143472
Attributes: {'size': 1}
Node: 2060188988
Attributes: {'size': 2}
Node: 50697580948
Attributes: {'size': 1}
Node: 1571238388
Attributes: {'size': 4}
Node: kendalljenner
Attributes: {'size': 86}
Node: bellahadid
Attributes: {'size': 86}
Node: 325129721
Attributes: {'size': 4}
Node: tavisupleba
Attributes: {'size': 77}
Node: 8524860861
Attributes: {'size': 1}
Node: 24377708608
Attributes: {'size': 5}
Node: retro_camera_shop
Attributes: {'size': 91}
Node: 1471161400
Attributes: {'size': 3}
Node: 31141223158
Attributes: {'size': 3}
Node: 61160304079
Attributes: {'size': 2}
Node: 37330553
Attributes: {'size': 3}
Node: 40166693794
Attributes: {'siz

In [55]:
net.from_nx(G)
html = net.generate_html()
with open("visualizations/top_pages_network.html", mode='w', encoding='utf-8') as fp:
        fp.write(html)

Preliminary visualization showed that there are pages that are different in **theme** and they seem to be **grouping** together. Let's explore these communities and relationships between them

### Exploring communities

In [56]:
# Self join pairs_with_100_following on id to find shared followers between pages 
pairs_with_shared_followers = pairs_with_100_following[['top_friend_username', 'id']].merge(pairs_with_100_following[['top_friend_username', 'id']], on='id', suffixes=('_source', '_target'))
pairs_with_shared_followers

Unnamed: 0,top_friend_username_source,id,top_friend_username_target
0,frames__store,191716116,frames__store
1,frames__store,191716116,nasa
2,frames__store,191716116,tbilisi_pike
3,frames__store,191716116,plantloverstbilisi
4,nasa,191716116,frames__store
...,...,...,...
126143,b.azzzar,1028011269,matters.ge
126144,b.azzzar,1028011269,printshop_jpg
126145,b.azzzar,1028011269,retro_bazaari
126146,b.azzzar,1028011269,the_arthousegallery


In [57]:
unique_pairs = pairs_with_shared_followers[pairs_with_shared_followers['top_friend_username_source'] != pairs_with_shared_followers['top_friend_username_target']]
unique_pairs

Unnamed: 0,top_friend_username_source,id,top_friend_username_target
1,frames__store,191716116,nasa
2,frames__store,191716116,tbilisi_pike
3,frames__store,191716116,plantloverstbilisi
4,nasa,191716116,frames__store
6,nasa,191716116,tbilisi_pike
...,...,...,...
126142,b.azzzar,1028011269,cinemaholics.tbilisi
126143,b.azzzar,1028011269,matters.ge
126144,b.azzzar,1028011269,printshop_jpg
126145,b.azzzar,1028011269,retro_bazaari


In [58]:
# Sorting values to make sure there are no reverse pairs
relationship_df = pd.DataFrame(np.sort(unique_pairs[['top_friend_username_source', 'top_friend_username_target']].values, axis = 1), columns = unique_pairs[['top_friend_username_source', 'top_friend_username_target']].columns)
relationship_df

Unnamed: 0,top_friend_username_source,top_friend_username_target
0,frames__store,nasa
1,frames__store,tbilisi_pike
2,frames__store,plantloverstbilisi
3,frames__store,nasa
4,nasa,tbilisi_pike
...,...,...
120967,b.azzzar,cinemaholics.tbilisi
120968,b.azzzar,matters.ge
120969,b.azzzar,printshop_jpg
120970,b.azzzar,retro_bazaari


In [59]:
# Value represents shared follower
relationship_df['value'] = 1

In [60]:
relationship_df = relationship_df.groupby(['top_friend_username_source', 'top_friend_username_target']).sum().reset_index()
relationship_df

Unnamed: 0,top_friend_username_source,top_friend_username_target,value
0,2muchuse,9gag,16
1,2muchuse,anoukigold,32
2,2muchuse,atinati__,22
3,2muchuse,b.azzzar,32
4,2muchuse,badgalriri,26
...,...,...,...
4944,vinylcafe_tbilisi,zelenskiy_official,16
4945,vinylcafe_tbilisi,zendaya,26
4946,voguemagazine,zelenskiy_official,38
4947,voguemagazine,zendaya,60


In [62]:
relationship_df_sorted = relationship_df.sort_values(by ='value', ascending=False)
relationship_df_sorted

Unnamed: 0,top_friend_username_source,top_friend_username_target,value
2199,frames__store,magnoliafilmlab,196
2218,frames__store,retro_camera_shop,182
2247,frames__store,zendaya,174
870,bellahadid,frames__store,172
2190,frames__store,kendalljenner,172
...,...,...,...
1011,billieeilish,studiovision.tbilisi,2
3851,magnumphotos,tbiliniavi,2
4673,sportsmoodco,studiovision.tbilisi,2
649,balenciaga,studiovision.tbilisi,2


In [80]:
relationship_df_short = relationship_df_sorted[relationship_df_sorted['value']>=40]
relationship_df_short

Unnamed: 0,top_friend_username_source,top_friend_username_target,value
2199,frames__store,magnoliafilmlab,196
2218,frames__store,retro_camera_shop,182
2247,frames__store,zendaya,174
870,bellahadid,frames__store,172
2190,frames__store,kendalljenner,172
...,...,...,...
4027,matters.ge,tavisupleba,40
3256,johnnydepp,nasa,40
3451,kimkardashian,tchalamet,40
3517,kvara7,zendaya,40


In [81]:
# Create a graph from a pandas dataframe
H = nx.from_pandas_edgelist(relationship_df_short, 
                            source = "top_friend_username_source", 
                            target = "top_friend_username_target", 
                            edge_attr = "value", 
                            create_using = nx.Graph())


In [82]:
# Create community tags
communities = community_louvain.best_partition(H)
communities

{'frames__store': 0,
 'magnoliafilmlab': 1,
 'retro_camera_shop': 1,
 'zendaya': 4,
 'bellahadid': 3,
 'kendalljenner': 4,
 'tbilisi_pike': 1,
 'nasa': 4,
 'tavisupleba': 1,
 'fake.film.shop': 1,
 'formulaoriginals': 1,
 'the_goodfilms': 2,
 'celestial.09': 2,
 'natgeo': 1,
 'tchalamet': 4,
 'meraki.ge': 0,
 'indigo_magazine': 1,
 'bohista.tribal': 0,
 'kyliejenner': 4,
 'selenagomez': 4,
 'hammockmagazine': 1,
 'gigihadid': 4,
 'kvara7': 1,
 'people_tbilisi': 1,
 'printshop_jpg': 1,
 'voguemagazine': 4,
 'khaleniwear': 0,
 'badgalriri': 4,
 'glammerr_store': 0,
 'matters.ge': 0,
 'tavrieli_': 0,
 'tomholland2013': 4,
 'harrystyles': 4,
 'the_arthousegallery': 1,
 'jenniferaniston': 4,
 'cinemamonamourpage': 2,
 'buddyshoptbilisi': 0,
 'balenciaga': 3,
 'mathilda.gvarliani': 1,
 'booksfrompast': 1,
 'paulinewears': 0,
 '9gag': 1,
 'fid_____el': 0,
 '2muchuse': 0,
 'haileybieber': 4,
 'vintage.moduss': 0,
 'johnnydepp': 4,
 'zelenskiy_official': 1,
 'soso_aroundtheworld': 0,
 'sukhishvi

In [83]:
nx.set_node_attributes(H, communities, 'group')
net = Network(notebook = True,
            cdn_resources='in_line', 
            width="1000px", 
            height="700px", 
            bgcolor='#ffffff', 
            font_color='black',
            select_menu=True,
            filter_menu=True)

net.show_buttons()
net.toggle_physics(True)

node_degree = dict(H.degree)

#Setting up node size attribute
nx.set_node_attributes(H, node_degree, 'size')

net.from_nx(H)

In [84]:
html = net.generate_html()
with open("visualizations/communities.html", mode='w', encoding='utf-8') as fp:
        fp.write(html)