In [43]:
from neo4j import GraphDatabase
import pandas as pd

import py2neo as pn
from pprint import pprint

import networkx as nx

import re

## User - [ordered] -> Product graph

Basic questions to answer:
* Users with most purchases, most ordered products/brands
* Demographic segments and their product preferences
* Centrality

In [4]:
driver = GraphDatabase.driver("bolt://localhost:11004", auth=("neo4j", "test"), encrypted=False)
session = driver.session()

In [5]:
result = session.run("""match (u:User)-[:ORDERED]->(p:Product)
                        return u.id as user, count(p.id) as num_products
                        order by num_products desc
                        limit 10""")
print(result)

<neo4j.work.result.Result object at 0x115107c88>


In [9]:
for r in result:
    print(r)

<Record user='2d25359f9b' num_products=605>
<Record user='c1c8327987' num_products=529>
<Record user='7ea01030cb' num_products=169>
<Record user='1a75247e23' num_products=158>
<Record user='bd2cea0899' num_products=150>
<Record user='6f6e75e6d4' num_products=146>
<Record user='ed292ef15b' num_products=120>
<Record user='a309b4f150' num_products=108>
<Record user='3b27615997' num_products=108>
<Record user='aa6f8d33c2' num_products=94>


In [11]:
graph = pn.Graph("bolt://localhost:11004", auth=("neo4j", "test"))

### Users with most purchases, most ordered products/brands

Users who order the most products

In [47]:
pd.DataFrame(graph.run("""match (u:User)-[:ORDERED]->(p:Product)
                        return u.id as user, count(p.id) as num_products
                        order by num_products desc
                        limit 10""").data())

Unnamed: 0,user,num_products
0,2d25359f9b,605
1,c1c8327987,529
2,7ea01030cb,169
3,1a75247e23,158
4,bd2cea0899,150
5,6f6e75e6d4,146
6,ed292ef15b,120
7,a309b4f150,108
8,3b27615997,108
9,aa6f8d33c2,94



Products with the most orders

In [49]:
pd.DataFrame(graph.run("""match (u:User)-[:ORDERED]->(p:Product)
                        return p.id as product, count(u.id) as num_ordered
                        order by num_ordered desc
                        limit 10""").data())

Unnamed: 0,product,num_ordered
0,068f4481b3,23655
1,3c79df1d80,14463
2,a9109972d1,12176
3,8dc4a01dec,9891
4,fbce41fd82,8386
5,904fbf8b97,7913
6,ca7647a231,6456
7,38d636d2a6,6446
8,7e4cb4952a,5567
9,adfedb6893,5486



Brands with the most orders

In [51]:
pd.DataFrame(graph.run("""match (u:User)-[:ORDERED]->(p:Product)
                        return p.brand as brand, count(u.id) as brand_num_ordered
                        order by brand_num_ordered desc
                        limit 10""").data())

Unnamed: 0,brand,brand_num_ordered
0,9b0d3a5fc6,52723
1,99d41501ff,51369
2,5ab8ea8556,28839
3,43999af013,21482
4,3daeabd2ce,19881
5,4f65703579,18324
6,906d0caa60,15994
7,0b0f75e8d5,15075
8,4efb032b5a,14986
9,204dbedf50,13221



### Demographic segments and their preferences

Gender and products ordered

In [70]:
pd.DataFrame(graph.run("""match (u:User)-[:ORDERED]->(p:Product)
                        return u.gender as gender, count(p.id) as total_products_ordered, 
                        count(distinct p.id) as unique_products_ordered""").data())

Unnamed: 0,gender,total_products_ordered,unique_products_ordered
0,M,121353,4796
1,F,338480,7331
2,U,68444,4614



Age and products ordered

In [69]:
pd.DataFrame(graph.run("""match (u:User)-[:ORDERED]->(p:Product)
                        return u.age as age, count(p.id) as total_products_ordered, 
                        count(distinct p.id) as unique_products_ordered""").data())

Unnamed: 0,age,total_products_ordered,unique_products_ordered
0,36-45,94795,4792
1,26-35,211477,5730
2,U,67529,4580
3,16-25,116540,4622
4,46-55,21542,2630
5,>=56,16360,2151
6,<=15,34,34



Purchase power and products ordered

In [71]:
pd.DataFrame(graph.run("""match (u:User)-[:ORDERED]->(p:Product)
                        return u.purchase_power as purchase_power, count(p.id) as total_products_ordered, 
                        count(distinct p.id) as unique_products_ordered""").data())

Unnamed: 0,purchase_power,total_products_ordered,unique_products_ordered
0,2,277740,6667
1,-1,116498,5589
2,3,111003,4868
3,1,10614,1500
4,4,12247,1974
5,5,175,124



Membership and products ordered

In [72]:
pd.DataFrame(graph.run("""match (u:User)-[:ORDERED]->(p:Product)
                        return u.membership as membership, count(p.id) as total_products_ordered, 
                        count(distinct p.id) as unique_products_ordered""").data())

Unnamed: 0,membership,total_products_ordered,unique_products_ordered
0,1,95184,3744
1,0,433093,8426



Gender and unique brands

In [60]:
pd.DataFrame(graph.run("""match (u:User)-[:ORDERED]->(p:Product)
                        return u.gender as gender, count(distinct p.brand) as brands_ordered
                        order by brands_ordered desc""").data())

Unnamed: 0,gender,brands_ordered
0,F,960
1,M,759
2,U,736



Combining demographic factors - age and gender

In [73]:
pd.DataFrame(graph.run("""match (u:User)-[:ORDERED]->(p:Product)
                        return u.gender as gender, u.age as age, count(p.id) as total_products_ordered, 
                        count(distinct p.id) as unique_products_ordered""").data())

Unnamed: 0,gender,age,total_products_ordered,unique_products_ordered
0,M,36-45,26595,2486
1,F,26-35,154393,5049
2,U,U,67529,4580
3,M,26-35,56924,3177
4,F,36-45,68014,4191
5,F,16-25,88448,4087
6,M,16-25,27751,2367
7,F,46-55,15791,2255
8,F,>=56,11811,1805
9,M,>=56,4459,1020



Combining demographic factors: gender, age, purchase power, unique products.

In [74]:
pd.DataFrame(graph.run("""match (u:User)-[:ORDERED]->(p:Product)
                        return u.gender as gender, u.age as age, u.purchase_power as purchase_power,
                        count(distinct p.id) as unique_products_ordered
                        order by unique_products_ordered desc limit 20""").data())

Unnamed: 0,gender,age,purchase_power,unique_products_ordered
0,U,U,-1,4580
1,F,26-35,2,4083
2,F,36-45,2,3243
3,F,26-35,3,2799
4,F,16-25,2,2747
5,M,26-35,2,2664
6,F,16-25,3,2500
7,F,36-45,3,2150
8,M,36-45,2,2040
9,F,16-25,-1,1996


In [67]:
pd.DataFrame(graph.run("""match (u:User)-[:ORDERED]->(p:Product)
                        return u.gender as gender, u.age as age, u.purchase_power as purchase_power,
                        count(distinct p.id) as products_ordered
                        order by products_ordered desc limit 20""").data())

Unnamed: 0,gender,age,purchase_power,products_ordered
0,U,U,-1,4580
1,F,26-35,2,4083
2,F,36-45,2,3243
3,F,26-35,3,2799
4,F,16-25,2,2747
5,M,26-35,2,2664
6,F,16-25,3,2500
7,F,36-45,3,2150
8,M,36-45,2,2040
9,F,16-25,-1,1996
