In [24]:
#THIS NOTEBOOK WILL READ DATA FROM CSV FILE, AND CREATE NODES AND BASIC RELATIONSHIPS IN GIVEN NEO4J DB.

from neo4j import GraphDatabase, basic_auth
from neo4j.exceptions import Neo4jError
import neo4j.time
import csv
import json
import time
import os
from flask_restful_swagger_2 import Api, swagger, Schema


#USER CONFIG 
DATABASE_USERNAME="neo4j"
DATABASE_PASSWORD="spade-discounts-switch"
DATABASE_URL="bolt://localhost:7687"
DATA_FILE_PATH = 'dev-to-articles.csv'
#USER CONFIG END
def cprint(content,module='DEBUG',*args):
    if args:
        print('\033[1;32;43m ['+module+'] \033[0m '+ content + '\033[1;35m' +str(args) +' \033[0m' + time.strftime(" |%Y-%m-%d %H:%M:%S|", time.localtime()) )
    else:
        print('\033[1;32;43m ['+module+'] \033[0m '+ content + time.strftime(" |%Y-%m-%d %H:%M:%S|", time.localtime()))
        
driver = GraphDatabase.driver(DATABASE_URL, auth=basic_auth(DATABASE_USERNAME, str(DATABASE_PASSWORD)))

class ArticleModel(Schema):
    type = 'object'
    properties = {
        'id': {
            'type': 'string',
        },
        'title': {
            'type': 'string',
        },
        'url': {
            'type': 'string',
        },
        'main_image_url': {
            'type': 'string',
        },
        'reading_time': {
            'type': 'integer',
        },
        'tag_names': {
            'type': 'string',
        },
        'published_at': {
            'type': 'string',
        },
        'public_reactions_count':{
            'type': 'integer',
        },
        'source_site':{
            'type': 'string',
        }
    }

class AuthorModel(Schema):
    type = 'object'
    properties = {
        'id':{
            'type': 'string',
        },
        'username':{
            'type': 'string',
        },
        'name':{
            'type': 'string',
        }
    }

class TagModel(Schema):
    type = 'object'
    properties = {
        'name':{
            'type': 'string',
        },
        'keywords_for_search':{
            'type': 'string',
        }
    }
def serialize_article(article):
    return {
        'id': article['id'],
        'title': article['title'],
        'url': article['url'],
        'main_image_url': article['main_image_url'],
        'reading_time': article['reading_time'],
        'tag_names': article['tag_names'],
        'published_at': article['published_at'],
        'source_site': article['source_site'],
    }
def serialize_author(author):
    return {
        'id': author['id'],
        'username': author['username'],
        'name': author['name'],
    }
def serialize_tag(tag):
    return{
        'name':tag['name'],
        'keywords_for_search':tag['keywords_for_search']
    }
   
def db_create_nodes(session,labels,properties):
    '''
    Create nodes with custom labels and properties
    Args:
        session: db session,driver.session()
        labels: ["Human", "MovieStar"]
        properties => [{name: "Tom Cruise", placeOfBirth: "Syracuse, New York, United States"},
                        {name: "Reese Witherspoon", placeOfBirth: "New Orleans, Louisiana, United States"}]
    Return:
        Added nodes 
    '''
    def _cypher(tx, labels, properties):
                return list(tx.run(
                    '''
                    CALL apoc.create.nodes( $labels, $properties);
                    ''', {'labels': labels , 'properties': properties}
                ))
    result = session.write_transaction(_cypher,labels,properties)
    cprint(str(len(result))+'record added','DB')
    return result

def db_get_nodes(session,label,limit):
    '''
    Return Nodes with a given label
    Args:
        session: db session,driver.session()
        labels: ["Human"]
        limit: max number of nodes
    Return:
        Cypher result
    '''
    def _cypher(tx,label,limit):
        return list(tx.run(
            '''
            MATCH (n:$label) RETURN n LIMIT $limit
            ''', {'label': label , 'limit': limit}
        ))
    result = session.read_transaction(_cypher,label,limit)
    cprint('Get '+str(len(result))+' records','DB')
    return result

def db_create_relation(session,label,props,id1,id2):
    '''
    Create relation with specific id
    Args:
        session: db session,driver.session()
        labels: ["Human"]
        limit: max number of nodes
    Return:
        Cypher result
    '''
    def _cypher(tx,label,id1,id2,props):
        return list(tx.run(
        '''
        MATCH (p) WHERE id(p) = $id1
        MATCH (m) WHERE id(m) = $id2
        CALL apoc.create.relationship(p, $label, $props, m)
        YIELD rel
        RETURN rel;
        ''',{'label':label,'props':props,'id1':id1,'id2':id2}
        ))
    result = session.write_transaction(_cypher,label,props,id1,id2)
    cprint(str(len(result))+'record added','DB')
    return result



        

In [32]:
# This block return 3 list of nodes for a given csv file:
# Article :
article_props = []
# Author:
author_props = []
# Tag:
tag_props = []
if os.path.exists(DATA_FILE_PATH):
    if not os.path.getsize(DATA_FILE_PATH):
        cprint(DATA_FILE_PATH +'is empty')
    else:
        with open(DATA_FILE_PATH, mode='r',encoding="utf-8") as data_file_r:
            csv_reader = csv.DictReader(data_file_r)
            line_count = 0
            props=set()
            for row in csv_reader:
                if line_count == 0:
                    cprint(f'Processing CSV header {", ".join(row)}','CSV')
                    line_count += 1
                article_prop = {
                            'id': row['id'],
                            'title': row['title'],
                            'url': row['url'],
                            'main_image_url': row['main_image_url'],
                            'reading_time': row['reading_time'],
                            'tag_names': row['tag_names'],
                            'published_at': row['published_at'],
                            'source_site':'dev.to',
                }
                if article_prop in article_props:
                    continue
                else:
                    article_props.append(article_prop)
                    
                author_prop = {
                            'id': row['author_id'],
                            'username': row['author_username'],
                            'name': row['author_name'],
                }
                if author_prop in author_props:
                    continue
                else:
                    author_props.append(author_prop)
                
                for tag in row['tag_names'].split('+'):
                    tag_prop = {
                            'name':tag,
                            'keywords_for_search':''
                    }
                    if tag_prop in tag_props:
                        continue
                    else:
                        tag_props.append(tag_prop)
                
                #end todo
                line_count += 1
            cprint(f'File processed successfully with {line_count-1} ids.','CSV')
        data_file_r.close()
else:
    cprint(DATA_FILE_PATH +' does not exist')
        



[1;32;43m [CSV] [0m Processing CSV header id, title, url, main_image_url, reading_time, author_name, author_username, author_id, published_at, tag_names, keywords_for_search, comments_count, public_reactions_count, highlight |2021-04-02 21:14:07|
[1;32;43m [CSV] [0m File processed successfully with 1334 ids. |2021-04-02 21:14:08|


In [41]:
with driver.session() as session: 
#     labels = ['Article']
#     res = db_create_nodes(session,labels,props)
#     res = db_create_nodes(session,['Tag'],tag_props)
    res = db_create_nodes(session,['Author'],author_props)
    session.close()

[1;32;43m [DB] [0m 1334record added |2021-04-02 21:33:53|


In [43]:
tag_props[1:3]

[{'name': 'beginners', 'keywords_for_search': ''},
 {'name': 'devops', 'keywords_for_search': ''}]

In [44]:
article_props[0:3]

[{'id': '494489',
  'title': 'Full Docker Course [FREE] 🎉 🐳',
  'url': 'https://dev.to//techworld_with_nana/full-docker-course-free-4hl3',
  'main_image_url': 'https://dev-to-uploads.s3.amazonaws.com/i/a0gvmzph343m9wvjys6h.png',
  'reading_time': '2',
  'tag_names': 'tutorial+beginners+devops+docker',
  'published_at': '2020-10-22T07:40:13.566Z',
  'source_site': 'dev.to'},
 {'id': '478718',
  'title': 'ReactJS Roadmap 🗺 For Developers.💻',
  'url': 'https://dev.to//theme_selection/reactjs-roadmap-for-developers-2824',
  'main_image_url': 'https://res.cloudinary.com/practicaldev/image/fetch/s--1sbeprFD--/c_imagga_scale,f_auto,fl_progressive,h_420,q_auto,w_1000/https://dev-to-uploads.s3.amazonaws.com/i/4t5s6u97jg9czgrdb6bk.png',
  'reading_time': '7',
  'tag_names': 'beginners+javascript+react+webdev',
  'published_at': '2020-10-06T13:53:31.598Z',
  'source_site': 'dev.to'},
 {'id': '458580',
  'title': '10 useful HTML5 features, you may not be using',
  'url': 'https://dev.to//atapas/10

In [None]:
#DBF
def db_create_tag_relation_to_article(session,tag_name,article_url):
    def _cypher(tx,tag_name,article_url):
#CALL apoc.create.relationship(a, "HAS_TAG",{article_id:$article_id, tag_name:$tag_name},  t)
        return list(tx.run(
        '''
        MATCH (a:Article {url: $article_url})
        MATCH (b:Tag {name: $tag_name})
        CREATE (a)-[rel:HAS_TAG]->(b)
        RETURN rel
        ''',{'article_url':article_url,'tag_name':tag_name}
        
        ))    
    result = session.write_transaction(_cypher,tag_name,article_url)
    return result
with driver.session() as session:
    counter=0
    for article in article_props:
        for tag in article['tag_names'].split('+'):
            res = db_create_tag_relation_to_article(session,tag,article['url'])
            if not res:
                cprint('Failed to create relation with '+str(article['url']+" -> "+str(tag)))
                
#             cprint(tag+article['id'])
            else:
                counter+=1
    cprint(str(counter)+' record added','DB')
    session.close()

In [77]:
res

[]

NameError: name 'time' is not defined