In [1]:
import pandas as pd
import os
import json
from sqlalchemy import create_engine, text as sql

In [2]:
engine = create_engine(f"postgresql://postgres:{os.environ['PG_PASS']}@localhost/craft_beer")

In [3]:
selected_beers_query = """
SELECT 
beers.id as beer_id,
beers.name as beer_name,
beers.review_text_json
FROM beers
WHERE beers.review_text_json IS NOT NULL
GROUP BY beers.id, beer_name
"""

In [4]:
selected_beers_results = pd.read_sql(selected_beers_query, con=engine)

In [7]:
class BeerFlavor:
    def __init__(self, **kwargs):
        self.name = kwargs.get("name")
        self.parent = kwargs.get("parent", kwargs.get("name"))
        self.flavors = kwargs.get("flavors", [kwargs.get("name")])
        self.threshold = kwargs.get("threshold", .85)
        self.final_flavor_obj = {
            "name":self.parent,
            "children": []
        }
        self.flavor_keys = []
        
    def check_flavors(self, beer_json):
        flavor_object = {}
        checked_terms = []
        for key in beer_json:
            for flavor in self.flavors:
                if self._compare_terms(flavor.lower(), key) > self.threshold and key not in checked_terms:
                    checked_terms.append(key)
                    if flavor not in flavor_object.keys():
                        flavor_object[flavor] = 0
                    flavor_object[flavor] += beer_json[key][0]
        return self._normalize_flavor_object(flavor_object, checked_terms)
    
    def _compare_terms(self, main_word, word_to_compare):
        main_len = len(main_word)
        compare_len = len(word_to_compare)
        main_split = list(main_word)
        final_results = []
        skip_words = ['better', 'honest', "straight", "worthi", "worth","worthless", "glass","butterscotch"]
        if(word_to_compare not in skip_words):
            for index, letter in enumerate(main_split):
                if(main_len <= compare_len):
                    final_results.append(int(letter == word_to_compare[index]))
                elif (index == compare_len) or main_split[0] != word_to_compare[0]:
                    break
                elif (compare_len > 1 and main_split[0] == word_to_compare[0] and main_split[1] != word_to_compare[1]):
                    break
                else:
                    final_results.append(int(letter == list(word_to_compare)[0:main_len][index]))
            return sum(final_results)/main_len
        return 0
    
    def _normalize_flavor_object(self, flavor_object, flavor_terms):
        for key in flavor_object.keys():
            if(key not in self.flavor_keys):
                self.flavor_keys.append(key)
                self.final_flavor_obj["children"].append({
                    "terms": flavor_terms,
                    "name": self.name,
                    "children": [{"name": key, "value": flavor_object[key]} for key in flavor_object.keys()]
                })
            
        return self.final_flavor_obj

In [8]:
import json

class FlavorWheel():
    def __init__(self, **kwargs):
        self.threshold = kwargs.get("threshold", .85)
        self.flavors = [
            BeerFlavor(threshold=self.threshold,name="Bitter"), 
            BeerFlavor(threshold=self.threshold,name="Sweet", flavors=["Oversweet", "Syrupy", "Primings", "Vanilla","Jam-like","Honey"]),
            BeerFlavor(threshold=self.threshold,name="Acidic", flavors=["Sour","Acetic"]),
            BeerFlavor(threshold=self.threshold,name="Moldy", flavors=["Musty","Earthy"], parent="Stale"),
            BeerFlavor(threshold=self.threshold,name="Stale", flavors=["Leathery","Papery","Catty"]),
            BeerFlavor(threshold=self.threshold,name="Sulfery", flavors=["Striking match","Match","Meaty"], parent="Sulfitic"),
            BeerFlavor(threshold=self.threshold,name="Sulfery", flavors=["Hydrogen sulfide","Sulfide","Mercaptan","Garlic","Lightstruck","Autolysed","Burnt Rubber","Rubber","Shrimp-like","Shrimp"], parent="Sulfidic"),
            BeerFlavor(threshold=self.threshold,name="Sulfery", flavors=["Yeasty"]),
            BeerFlavor(threshold=self.threshold,name="Sulfery", flavors=["Onion","Tomato","Sweetcorn","Corn","Parnsip","Celery","Dimethyl Sulfide","Cabbage","Cooked"], parent="Cooked Vegetables"),
            BeerFlavor(threshold=self.threshold,name="Mouthfeel", flavors=["Metallic","Mouthcoating","Alkaline"]),
            BeerFlavor(threshold=self.threshold,name="Mouthfeel", flavors=["Puckering", "Tart", "Drying"], parent="Astringent"),
            BeerFlavor(threshold=self.threshold,name="Mouthfeel", flavors=["Powdery"]),
            BeerFlavor(threshold=self.threshold,name="Mouthfeel", flavors=["Flat","Gassy"], parent="Carbonation"),
            BeerFlavor(threshold=self.threshold,name="Mouthfeel", flavors=["Piquant"], parent="Warming"),
            BeerFlavor(threshold=self.threshold,name="Fullness", flavors=["Watery","Characterless","Satiating","Thick"], parent="Body"),
            BeerFlavor(threshold=self.threshold,name="AFFF", flavors=["Spicy","Vinous"], parent="Alcoholic"),
            BeerFlavor(threshold=self.threshold,name="AFFF", flavors=["Plastics","Can-liner","Lacquer"], parent="Solvent-like"),
            BeerFlavor(threshold=self.threshold,name="AFFF", flavors=["Isoamyl Acetate", "Ethyl Hexanoate", "Ethyl acetate", "Ethyl", "Acetate"], parent="Estery"),
            BeerFlavor(threshold=self.threshold,name="AFFF", flavors=["Citrus", "Apple","Banana","Black Currant","Melony","Pear","Raspberry", "Strawberry"], parent="Fruity"),
            BeerFlavor(threshold=self.threshold,name="AFFF", flavors=["Acetaldehyde"]),
            BeerFlavor(threshold=self.threshold,name="AFFF", flavors=["2-Phenylethanol", "Geranoil"], parent="Floral"),
            BeerFlavor(threshold=self.threshold,name="AFFF", flavors=["Dry-hop", "Kettle-hop", "Hop oil"], parent="Hoppy"),
            BeerFlavor(threshold=self.threshold,name="Vegetal", flavors=["Piney","Woody"], parent="Resinous"),
            BeerFlavor(threshold=self.threshold,name="Vegetal", flavors=["Walnut","Coconut","Beany", "Almond"], parent="Nutty"),
            BeerFlavor(threshold=self.threshold,name="Vegetal", flavors=["Grass","Straw-like","Straw", "Freshly cut grass"], parent="Grassy"),
            BeerFlavor(threshold=self.threshold,name="Cereal", flavors=["Husky","Corn grits", "Mealy"], parent="Grainy"),
            BeerFlavor(threshold=self.threshold,name="Cereal", flavors=["Malty","Worty"]),
            BeerFlavor(threshold=self.threshold,name="Maillard", flavors=["Molasses","Licorice"], parent="Caramel"),
            BeerFlavor(threshold=self.threshold,name="Maillard", flavors=["Bread", "Roast barley", "Crust", "Bread Crust","Smoky", "Barley"], parent="Burnt"),
            BeerFlavor(threshold=self.threshold,name="Phenolic", flavors=["Tarry","Bakelite", "Carbolic","Chlorophenol", "Iodoform"]),
            BeerFlavor(threshold=self.threshold,name="Fatty", flavors=["Caprylic","Cheesy","Isovaleric","Butyric"], parent="Fatty Acids"),
            BeerFlavor(threshold=self.threshold,name="Fatty", flavors=["Diacetyl","Rancid"]),
            BeerFlavor(threshold=self.threshold,name="Fatty", flavors=["Vegetable oil","Mineral oil", "oil"], parent="Oily"),
        ]
    
    def check_flavors(self, beer):
        beer_flavor_wheel = {
            "name": f"{beer['beer_name']} Flavor Wheel",
            "children" : []
        }
        for flavor in self.flavors:
            flavor_check = flavor.check_flavors(beer['review_text_json'])
            if(len(flavor_check['children']) > 0):
                try:
                    if flavor_check['name'] == flavor_check['children'][0]['name']:
                        if flavor_check['children'][0]['name'] == flavor_check['children'][0]['children'][0]['name']:
                            flavor_check['children'][0]['children'][0]['terms'] = flavor_check['children'][0]['terms']
                            beer_flavor_wheel["children"].append(flavor_check['children'][0]['children'][0])
                        else:
                            beer_flavor_wheel["children"].append(flavor_check['children'][0])
                    elif(flavor_check['name'] in [child['name'] for child in beer_flavor_wheel["children"]]):
                        matched_flavor = [child for child in beer_flavor_wheel["children"] if child['name'] == flavor_check['name']][0]
                        matched_flavor['children'] += flavor_check["children"]
                    else:
                        flavor_check['children'] = flavor_check['children'][0]
                        beer_flavor_wheel["children"].append(flavor_check)
                except Exception as e:
                    pass
        return beer_flavor_wheel

In [27]:
for i, beer in enumerate(selected_beers_results.to_dict('records')):
    flavor_wheel = FlavorWheel(threshold=.7)
                             
    with open(f'./json/{i}.json', 'w') as outfile:
        json.dump(flavor_wheel.check_flavors(beer), outfile)