In [None]:
pip install py2neo

# Overview of the data
For creating the social network graph I sourced the [Twitter friends dataset](https://www.kaggle.com/datasets/hwassner/TwitterFriends) data from Kaggle. The data contains 40,000 rows of user information, for the sake of visualization in neo4j I have randomly sampled 300 rows of the original data into a seperate dataframe that can be seen below. 

In [1]:
from py2neo import Graph, Node, Relationship
import pandas as pd
import ast

In [2]:
# To change the working directory to the current directory.
%cd /home/jovyan/work

# Connect to the Neo4j database.
graph = Graph("bolt://neo4j:7687", auth=("neo4j", "password"))

/home/jovyan/work


In [3]:
# Load the CSV data into a pandas DataFrame.
data = pd.read_csv('data/data.csv')

data.head()

Unnamed: 0,screenName,tags,avatar,followersCount,friendsCount,lang,lastSeen,tweetId,id,friends
0,"""""""camvaleska""""""","""[ """"#respecttylerjoseph"""" ]""","""""""http://pbs.twimg.com/profile_images/7275889...",3012,977,"""""""en""""""",1472264461130,"""""""769302434405572608""""""",82823,"[20241, 26779, 28737, 71597, 68647, 65249, 548..."
1,"""""""unareid14""""""","""[ """"#nationaldogday"""" ]""","""""""http://pbs.twimg.com/profile_images/7600919...",828,471,"""""""en""""""",1472264323580,"""""""769303240445943808""""""",23526,"[37000, 87755]"
2,"""""""nikki_nkr""""""","""[ """"#nationaldogday"""" ]""","""""""http://pbs.twimg.com/profile_images/7530701...",346,271,"""""""en""""""",1472258558820,"""""""769298506112638977""""""",26590,"[47359, 94038, 28887, 58357, 62781, 95756, 71397]"
3,"""""""itsathirahnasir""""""","""[ """"#happyjungkookday"""" ]""","""""""http://pbs.twimg.com/profile_images/7706216...",186,114,"""""""en""""""",1472669105070,"""""""771055063825387520""""""",97667,"[37460, 84837, 50155]"
4,"""""""_ughcrying""""""","""[ """"#respecttylerjoseph"""" ]""","""""""http://pbs.twimg.com/profile_images/7664517...",256,220,"""""""en""""""",1472266228272,"""""""769305146392064000""""""",48114,[]


## Creating nodes and relationships
We are utilizing the below function to create nodes as users, tags and HAS_TAG, FRIENDS_WITH as relationships between the nodes using cypher queries. 

In [4]:

def create_user_nodes_tags_and_friendships(data):
    for index, row in data.iterrows():
        # Create user node
        user_node = Node("User",
                         id=row['id'],
                         screenName=row['screenName'],
                         avatar=row['avatar'],
                         followersCount=row['followersCount'],
                         friendsCount=row['friendsCount'],
                         lang=row['lang'],
                         lastSeen=row['lastSeen'],
                         tweetId=row['tweetId'])
        graph.create(user_node)

        # Create tags as nodes
        tags_list = row['tags'].strip('[]').split(', ')
        for tag in tags_list:
            if tag:
                tag_node = Node("Tag", name=tag.strip().strip('"'))
                graph.merge(tag_node, "Tag", "name")
                relationship = Relationship(user_node, "HAS_TAG", tag_node)
                graph.create(relationship)

        # Convert the 'friends' column from string representation to a list of user IDs
        friends_list = ast.literal_eval(row['friends'])

        # Create relationships between the user and their friends
        for friend_id in friends_list:
            friend_node = graph.nodes.match("User", id=friend_id).first()  # Get the friend node from the database
            if friend_node:
                friendship = Relationship(user_node, "FRIENDS_WITH", friend_node)
                graph.create(friendship)


create_user_nodes_tags_and_friendships(data)
