### Imports

In [1]:
import py2neo
from pprint import pprint
import redis
import json
import credentials as cred

#checking version of py2neo
print(py2neo.__version__)

4.2.0


Note: the json module needs to be imported because questions 2 and 9 are created in a json form due to the fact that these questions have more than 1 answer.

### Connect to database

In [2]:
username=cred.neo4j_user
password=cred.neo4j_pass
host=cred.neo4j_host
port=cred.neo4j_port

secure_graph = py2neo.Graph(f"http://{username}:{password}@{host}:{port}")
secure_graph.run("MATCH () RETURN count(*)").data()

[{'count(*)': 9647597}]

### Getting the existing Labels on the database

In [3]:
result = secure_graph.run("""
         MATCH (n)                   
         RETURN distinct Labels(n)                    
""").data() 

pprint(result)

[{'Labels(n)': ['Reviews']},
 {'Labels(n)': ['Username']},
 {'Labels(n)': ['Breweries']},
 {'Labels(n)': ['Country']},
 {'Labels(n)': ['Beers']},
 {'Labels(n)': ['Style']}]


### Getting the existing relationships in the database

In [4]:
result = secure_graph.run("""
        MATCH (u:Username)-[r]-(i)
        RETURN distinct TYPE(r) , labels(i)
""").data()
pprint(result)

[{'TYPE(r)': 'MADE', 'labels(i)': ['Reviews']}]


In [5]:
result = secure_graph.run("""
        MATCH (b:Beers)-[r]-(i)
        RETURN distinct TYPE(r) , labels(i)
""").data()
pprint(result)

[{'TYPE(r)': 'BREWED_AT', 'labels(i)': ['Breweries']},
 {'TYPE(r)': 'OF_TYPE', 'labels(i)': ['Style']},
 {'TYPE(r)': 'ABOUT', 'labels(i)': ['Reviews']}]


In [6]:
result = secure_graph.run("""
        MATCH (b:Breweries)-[r]-(i)
        RETURN distinct TYPE(r) , labels(i)
""").data()
pprint(result)

[{'TYPE(r)': 'BREWED_AT', 'labels(i)': ['Beers']},
 {'TYPE(r)': 'FROM', 'labels(i)': ['Country']}]


In [7]:
result = secure_graph.run("""
        MATCH (c:Country)-[r]-(i)
        RETURN distinct TYPE(r) , labels(i)
""").data()
pprint(result)

[{'TYPE(r)': 'FROM', 'labels(i)': ['Breweries']}]


In [8]:
result = secure_graph.run("""
        MATCH (re:Reviews)-[r]-(i)
        RETURN distinct TYPE(r) , labels(i)
""").data()
pprint(result)

[{'TYPE(r)': 'ABOUT', 'labels(i)': ['Beers']},
 {'TYPE(r)': 'MADE', 'labels(i)': ['Username']}]


In [9]:
result = secure_graph.run("""
        MATCH (s:Style)-[r]-(i)
        RETURN distinct TYPE(r) , labels(i)
""").data()
pprint(result)

[{'TYPE(r)': 'OF_TYPE', 'labels(i)': ['Beers']}]


In [10]:
r = redis.Redis(
    host=cred.redis_host,                            # Url where the database is hosted
    port=cred.redis_port,                            # Server port in which redis is running
    password=cred.redis_pass,                        # Password to authenticate on the server
    decode_responses=True
)

server_info = r.info()

print("Server version "+ server_info["redis_version"])
print("Database DB0 stores "+str(server_info['db0']["keys"])+" keys")

Server version 4.0.8
Database DB0 stores 4791952 keys


**0** - How many beers does the database contain?

In [11]:
# if we consider each beer is identified by a unique id, then this gives us how many beers there are in the database
result_0 = secure_graph.run("""
         MATCH(beers:Beers) 
         RETURN count(distinct beers.id) as nr_unique_beer        
""").data()
pprint(result_0)

[{'nr_unique_beer': 358873}]


In [12]:
r.set("GROUP2:0", "358873")

True

**1** - How many countries are accounted in this database?

In [13]:
result_1 = secure_graph.run("""
         MATCH(c:Country) 
         RETURN count(c.country_digit) as nr_of_countries              
""").data() 
pprint(result_1)

[{'nr_of_countries': 200}]


In [14]:
r.set("GROUP2:1", "200")

True

**2** - Which entity holds the most reviews:  
    1. Beer?  
    2. Brewery?
    3. Country? 
    
We only have direct reviews with beers, but if we want to know which entity for each type of node has the most indirect or direct reviews, then this is the answer:

In [15]:
# we use the beer id along with the name, because the id is the unique identifier of a beer
result_2_beers = secure_graph.run("""
         MATCH (r:Reviews)-[to:ABOUT]-(be:Beers)
         RETURN be.name as beer_name, be.id as beer_id, count(to) as nr_reviews
         ORDER BY count(to) DESC
         LIMIT 2
""").data() 
print(f"Most reviewed beer: {result_2_beers}\n")

result_2_brewery = secure_graph.run("""
         MATCH (r:Reviews)-[to:ABOUT]-(be:Beers)-[ba:BREWED_AT]-(br:Breweries)
         RETURN br.name as brewery_name, count(r) as nr_reviews       
         ORDER BY count(r) DESC
         LIMIT 2
""").data() 
print(f"Most reviewed brewery: {result_2_brewery}\n")

result_2_country = secure_graph.run("""
         MATCH(r:Reviews)-[to:ABOUT]-(be:Beers)-[ba:BREWED_AT]-(br:Breweries)-[f:FROM]-(c:Country)
         RETURN c.country_digit as country_digit, count(r) as nr_reviews   
         ORDER BY count(r) DESC
         LIMIT 2
""").data() 
print(f"Most reviewed country: {result_2_country}")

Most reviewed beer: [{'beer_name': 'Breakfast Stout', 'beer_id': 11757, 'nr_reviews': 17160}, {'beer_name': '90 Minute IPA', 'beer_id': 2093, 'nr_reviews': 15947}]

Most reviewed brewery: [{'brewery_name': 'Sierra Nevada Brewing Co.', 'nr_reviews': 175161}, {'brewery_name': 'Stone Brewing', 'nr_reviews': 173286}]

Most reviewed country: [{'country_digit': 'US', 'nr_reviews': 7524410}, {'country_digit': 'BE', 'nr_reviews': 424003}]


In [31]:
question2 = {
    "Most Reviewed Beer": "beer_id - 11757, beer_name - Breakfast Stout",
    "Most Reviewed Brewery": "Sierra Nevada Brewing Co.",
    "Most Reviewed Country": "US"
}

r.set("GROUP2:2", json.dumps(question2))

True

**3** - Find the user/users that have the most shared reviews (reviews on the same beers) with the user CTJman?

In [17]:
result_3 = secure_graph.run("""
       MATCH (u1:Username{user_name:"CTJman"})-[:MADE]-(r1:Reviews)-[:ABOUT]-(b:Beers)-[:ABOUT]-(r2:Reviews)-[:MADE]-(u2:Username)
       RETURN u2.user_name as user_name, count(r2) as shared_reviews
       ORDER BY count(r2) DESC
       LIMIT 2
""").data() 
pprint(result_3)

[{'shared_reviews': 1428, 'user_name': 'acurtis'},
 {'shared_reviews': 1257, 'user_name': 'Texasfan549'}]


In [18]:
r.set("GROUP2:3", "acurtis")

True

**4** - Which Portuguese brand has the most beers?

In [19]:
result_4 = secure_graph.run("""
        MATCH (be:Beers)-[ba:BREWED_AT]-(br:Breweries)-[f:FROM]-(c:Country{country_digit:"PT"})
        RETURN br.name as brand, count(be) as nr_of_beers
        ORDER BY count(be) DESC
        LIMIT 2
""").data() 
pprint(result_4)

[{'brand': 'Dois Corvos Cervejeira', 'nr_of_beers': 40},
 {'brand': 'Unicer União Cervejeira S.A.', 'nr_of_beers': 22}]


In [20]:
r.set("GROUP2:4", "Dois Corvos Cervejeira")

True

**5** - Out of those beers, which has the most reviews?

In [21]:
result_5 = secure_graph.run("""
        MATCH (r:Reviews)-[a:ABOUT]-(b:Beers)-[ba:BREWED_AT]-(br:Breweries{name:"Dois Corvos Cervejeira"})
        RETURN b.name as beer, count(r) as nr_of_reviews
        ORDER BY count(r) DESC
        LIMIT 2
""").data() 
pprint(result_5)

[{'beer': 'Finisterra', 'nr_of_reviews': 10},
 {'beer': 'Matiné', 'nr_of_reviews': 6}]


In [22]:
r.set("GROUP2:5", "Finisterra")

True

**6** - On average how many different beer styles does each brewery produce?

In [23]:
result_6 = secure_graph.run("""    
CALL{
    MATCH (s:Style)-[t:OF_TYPE]-(be:Beers)-[ba:BREWED_AT]-(br:Breweries)
    RETURN br.name as brewery, count(distinct s) as how_many_styles
    }
RETURN avg(how_many_styles) as avg_brewery_styles
""").data() 
pprint(result_6)

[{'avg_brewery_styles': 10.669977315921768}]


In [32]:
r.set("GROUP2:6", "10.669977")

True

**7** - Which brewery produces the strongest beers according to ABV?

We considered that the breweries with the strongest beers are the ones with the higher average beers' ABV value.

In [25]:
result_7 = secure_graph.run("""
        MATCH (be:Beers)-[ba:BREWED_AT]-(br:Breweries)
        WHERE be.abv <> 'Unknown'
        RETURN br.name as brewery, round(avg(toFloat(be.abv))) as abv_value
        ORDER BY abv_value desc
        LIMIT 3
""").data() 
pprint(result_7)

[{'abv_value': 26.0, 'brewery': '1648 Brewing Company Ltd'},
 {'abv_value': 23.0, 'brewery': 'Schorschbräu'},
 {'abv_value': 21.0, 'brewery': "Brouwerij 't Koelschip"}]


In [26]:
r.set("GROUP2:7", "1648 Brewing Company Ltd")

True

**8** - If I typically enjoy a beer due to its aroma and appearance, which beer style should I try?

Considering smell as a proxy for aroma and look as a proxy for appearance, we believe the most adequate beer style is the one which has the highest average on smell and look combined. We also find it important to check the number of reviews indirectly made to each beer style to see if there is a significant number of responses that support the choice.

In [27]:
result_8 = secure_graph.run("""
        MATCH (r:Reviews)-[a:ABOUT]-(be:Beers)-[:OF_TYPE]-(s:Style)
        WHERE r.smell<>'Unkown' and r.look<>'Unknown'
        RETURN s.name as beer_style, avg(toFloat(r.smell)) as avg_smell_score, avg(toFloat(r.look)) as avg_look_score, count(r) as nr_reviews
        ORDER BY avg_smell_score+avg_look_score DESC, nr_reviews DESC
        LIMIT 3
""").data() 
pprint(result_8)

[{'avg_look_score': 4.383595613210904,
  'avg_smell_score': 4.41361476476119,
  'beer_style': 'New England IPA',
  'nr_reviews': 110696},
 {'avg_look_score': 4.286392481437848,
  'avg_smell_score': 4.2628841976746354,
  'beer_style': 'American Imperial Stout',
  'nr_reviews': 352195},
 {'avg_look_score': 4.1749641745318,
  'avg_smell_score': 4.230876612146084,
  'beer_style': 'Belgian Gueuze',
  'nr_reviews': 20237}]


In [28]:
r.set("GROUP2:8", "New England IPA")

True

**9** - If you had to pick 3 beers to recommend using only this data, which would you pick?

To recommend 3 beers we would want to consider the scores given by the customers and the number of reviews of each beer. For example, we wouldn't recommend a beer with an average score of 5 but with only 5 reviews.

In [29]:
result_9 = secure_graph.run("""
        MATCH (r:Reviews)-[a:ABOUT]-(be:Beers)
        WHERE r.score <> 'Unkown' AND
            EXISTS {
                  MATCH (r:Reviews)-[a:ABOUT]->(be:Beers)
                  WHERE size(()-[:ABOUT]-(be)) >= 50
                    }
        RETURN be.id as beer_id, be.name as beer_name, count(r) as nr_reviews, avg(toFloat(r.score)) as avg_score 
        ORDER BY avg_score DESC, nr_reviews DESC
        LIMIT 3
""").data() 
pprint(result_9)

[{'avg_score': 4.874492753623187,
  'beer_id': 197417,
  'beer_name': '2 Candles',
  'nr_reviews': 69},
 {'avg_score': 4.837735042735044,
  'beer_id': 78820,
  'beer_name': 'Kentucky Brunch Brand Stout',
  'nr_reviews': 702},
 {'avg_score': 4.800911722141826,
  'beer_id': 62397,
  'beer_name': 'Rare Bourbon County Brand Stout (2010)',
  'nr_reviews': 1382}]


In [30]:
question9 = {
    "Gold Medal": "beer_id - 197417, beer_name - 2 Candles",
    "Silver Medal": "beer_id - 78820, beer_name - Kentucky Brunch Brand Stout",
    "Bronze Medal": "beer_id - 62397, beer_name - Rare Bourbon County Brand Stout (2010)"
}
r.set("GROUP2:9", json.dumps(question9))

True