### Loading and trying the model with new data

In [1]:
import warnings
# Suppress all warnings
warnings.filterwarnings("ignore")

In [2]:
import sys
import os

# Add the path to the directory containing the attribute_cleaner module
sys.path.append(os.path.abspath("../transformer"))

# Now you can import text_cleaner
from attribute_cleaner.general_string_cleaner import text_cleaner

In [4]:
# Import the spaCy library
import spacy

# Load the trained spaCy NER model from the specified path
nlp = spacy.load('output_try_3/model-best')


In [5]:
def return_each_label(text):
    doc = nlp(text.lower())

    return_dict = {}
    
    for ent in doc.ents:
        # print(ent.label_)
        if ent.label_ not in return_dict.keys():
            return_dict[ent.label_] = [ent.text]
        else:
            return_dict[ent.label_].append(ent.text)
    return return_dict

In [6]:
text = """
Jasmine kissed cranberry - 100 % arabica that is sourced from Chikmagalur, grown at an altitude of approximately 4,100ft to 4,500ft. It gets its unique name through a process which include a strain of yeast used to create a carbon dioxide-rich environment during fermentation in stainless-steel fermenters. A remarkable level of complexity in the beans is created by this process, laying the groundwork for a wonderful cup of coffee. Steady drying on raised beds is the next important step, which lets the flavors gradually develop and intensify. After another 30 days of continuous stirring, the coffee undergo additional drying developing scent of jasmine, complex notes of raspberries, cranberries, and sparkling malic acidity with a lingering floral aftertaste 
"""

In [7]:
return_each_label(text_cleaner(text))

{'TASTING NOTES': ['jasmine', 'jasmine', 'raspberries', 'cranberries'],
 'COFFEE TYPE': ['arabica'],
 'LOCATION': ['chikmagalur'],
 'ELEVATION': ['4 , 100ft'],
 'ACIDITY': ['malic'],
 'COFFEE_PROPERTIES': ['floral']}

In [8]:
text ="""
Single origin Indian Coffee From Moganad Estate, Tamil Nadu which is a 100% Arabiaca coffee with 
an exquisite blend of balanced sweetness and brightness. A Medium Dark Roast coffee with 
flavor notes of Cocoa, Caramel and Nut which can also be enjoyed in a French press, moka pot, aeropress & espresso .
 It is washed processed, grown at an altitude of around 4430 ft

"""

In [9]:
return_each_label(text_cleaner(text))

{'ESTATE': ['moganad'],
 'LOCATION': ['tamil nadu'],
 'COFFEE TYPE': ['arabiaca'],
 'ROAST LEVEL': ['medium dark'],
 'TASTING NOTES': ['cocoa', 'caramel', 'nut'],
 'PROCESSING': ['washed'],
 'ELEVATION': ['4430 ft']}

In [10]:
text="""
tasting notes : toffee , hazelnut , milk chocolate roast level : espresso location-chikmagalur , kn 
altitude- 1400m varietal- selection 13 
process-washed weight-250gm/1kg;
"""
return_each_label(text_cleaner(text))

{'TASTING NOTES': ['toffee', 'hazelnut', 'milk chocolate'],
 'ROAST LEVEL': ['espresso'],
 'LOCATION': ['chikmagalur'],
 'ELEVATION': ['1400m'],
 'VARIETAL': ['selection 13'],
 'PROCESSING': ['washed']}

In [11]:
text = """ 
tasting notes : toffee , hazelnut , milk chocolate roast level : espressolocation- chikmagalur , knaltitude- 1400mvarietal- selection 13process- washed weight- 250gm/1kg
"""
return_each_label(text_cleaner(text))

{'TASTING NOTES': ['toffee', 'hazelnut', 'milk chocolate'],
 'LOCATION': ['chikmagalur'],
 'VARIETAL': ['selection 13process'],
 'PROCESSING': ['washed']}

In [12]:
text = """ 

our search for the immaculate espresso began when we were embraced by the warm welcome of ramdev and his parents shantha and nagesh as we drove through the gates of bettadamalali estate in early 2023 . a pristine farm nestled in the baba budan giri hills of chikmagalur , this place is more than a commercial operation – it is a garden of nature’s best delights . with scientific approaches to farming complemented by highly structured operations , bettadamalali produce some of the best washed arabicas we have encountered in recent times . the farm has been a long-term partner to our sister enterprise indcaffe – who have over the years exported several hundreds of tons of washed arabica for them to specialty houses in europe . we decided to grab our own little share of the treasure this year as we cycled through multiple lots to find this gem that would help us curate a delicious espresso . expect a sweet , rich-bodied liquor in your cup with a viscous creamy texture that will hug your palate closely before letting go . sweet honey and dark cocoa notes preceded by a savory aroma of roasted nuts together create a mesmerizing espresso that is testament to the great work of art that has been accomplished in the fields of bettadamalali estate . enjoy it black or hit it with milk ! honey cocoa roasted nuts our search for the immaculate espresso began when we were embraced by the warm welcome of ramdev and his parents shantha and nagesh as we drove through the gates of bettadamalali estate in early 2023 . a pristine farm nestled in the baba budan giri hills of chikmagalur , this place is more than a commercial operation – it is a garden of nature’s best delights . with scientific approaches to farming complemented by highly structured operations , bettadamalali produce some of the best washed arabicas we have encountered in recent times . the farm has been a long-term partner to our sister enterprise indcaffe – who have over the years exported several hundreds of tons of washed arabica for them to specialty houses in europe . we decided to grab our own little share of the treasure this year as we cycled through multiple lots to find this gem that would help us curate a delicious espresso . expect a sweet , rich-bodied liquor in your cup with a viscous creamy texture that will hug your palate closely before letting go . sweet honey and dark cocoa notes preceded by a savory aroma of roasted nuts together create a mesmerizing espresso that is testament to the great work of art that has been accomplished in the fields of bettadamalali estate . enjoy it black or hit it with milk

""" 
return_each_label(text.lower())

{'FARMER': ['ramdev', 'shantha', 'nagesh', 'ramdev', 'shantha', 'nagesh'],
 'ESTATE': ['bettadamalali',
  'bettadamalali',
  'bettadamalali',
  'bettadamalali',
  'bettadamalali',
  'bettadamalali'],
 'LOCATION': ['baba budan giri hills of chikmagalur ,',
  'baba budan giri hills of chikmagalur ,'],
 'PROCESSING': ['washed', 'washed', 'washed', 'washed'],
 'COFFEE TYPE': ['arabicas', 'arabica', 'arabicas', 'arabica'],
 'BODY': ['rich-bodied', 'rich-bodied'],
 'TASTING NOTES': ['honey',
  'dark cocoa',
  'roasted nuts',
  'honey cocoa',
  'honey',
  'dark cocoa',
  'roasted nuts']}

##### Even though transformer based model seems to be an overkill, the 3rd iteration is certainly an step up from last 2 iteration of same model !