# Import

In [1]:
#!pip install owlready2

In [2]:
from owlready2 import *
import pandas as pd
import numpy as np
import os

from tqdm import tqdm



# Load dataset

starbucks_info.columns = [
    'Temperature', 'Category', 'Product_Name', 'Milk', 'Whip', 'Calories',
       'Total_Fat', 'Saturated_Fat', 'Trans_Fat', 'Cholesterol', 'Sodium',
       'Total_Carbohydrates', 'Dietary_Fiber', 'Sugar', 'Protein', 'Vitamin_A',
       'Vitamin_C', 'Calcium', 'Iron', 'Caffeine']

def fullname(product, milk):
    if pd.isna(milk) == False: # milk 값이 있으면
        beverage = str(product) + ' with ' + str(milk) + ' Milk'
        
    else:
        beverage = str(product)
        
    return beverage
    
starbucks_info['Beverage'] = starbucks_info.apply(lambda x: fullname(x.Product_Name, x.Milk), axis = 1)

starbucks_info.to_csv('Starbucks_Recent_Menus_Tall.csv', encoding = 'UTF-8')

category = starbucks_info[['Temperature', 'Category', 'Product_Name']].copy()

category.drop_duplicates(inplace = True)

category.to_csv('product_category.csv', encoding = 'UTF-8')

In [3]:
menu_dir = 'https://raw.githubusercontent.com/regenesis90/coursework/main/ontology_engineering/Dataset/Starbucks_Recent_Menus_Tall.csv'

In [4]:
starbucks_info = pd.read_csv(menu_dir)

In [5]:
starbucks_info.head()

Unnamed: 0,ID,Beverage,Product,Milk,Whip,Calories,Total_Fat,Saturated_Fat,Trans_Fat,Cholesterol,Sodium,Total_Carbohydrates,Dietary_Fiber,Sugar,Protein,Vitamin_A,Vitamin_C,Calcium,Iron,Caffeine
0,Brewed_Coffee_Dark_Roast,Brewed Coffee - Dark Roast,Brewed Coffee - Dark Roast,,,4,0.1,0.0,0.0,0,10,0,0,0,0.5,0,0,0,0,193
1,Brewed_Coffee_Decaf_Pike_Place_Roast,Brewed Coffee - Decaf Pike Place Roast,Brewed Coffee - Decaf Pike Place Roast,,,4,0.1,0.0,0.0,0,10,0,0,0,0.5,0,0,0,0,20
2,Brewed_Coffee_Medium_Roast,Brewed Coffee - Medium Roast,Brewed Coffee - Medium Roast,,,4,0.1,0.0,0.0,0,10,0,0,0,0.5,0,0,0,0,235
3,Brewed_Coffee_True_North_Blend_Blonde_Roast,Brewed Coffee - True North Blend Blonde Roast,Brewed Coffee - True North Blend Blonde Roast,,,4,0.1,0.0,0.0,0,10,0,0,0,0.5,0,0,0,0,270
4,Caffe_Misto_with_Nonfat_Milk,Caffe Misto with Nonfat Milk,Caffe Misto,Nonfat,,60,0.2,0.1,0.0,5,70,8,0,8,6.0,10,0,20,0,115


starbucks_info = starbucks_info.astype(
    {
        'Calories' : 'float16',
        'Total_Fat' : 'float16',
        'Saturated_Fat' : 'float16',
        'Trans_Fat' : 'float16',
        'Cholesterol' : 'float16',
        'Sodium' : 'float16',
        'Total_Carbohydrates' : 'float16',
        'Dietary_Fiber' : 'float16',
        'Sugar' : 'float16',
        'Protein' : 'float16', 
        'Vitamin_A' : 'float16',
        'Vitamin_C' : 'float16',
        'Calcium' : 'float16',
        'Iron' : 'float16',
        'Caffeine' : 'float16'
    }
)

In [6]:
category_dir = 'https://raw.githubusercontent.com/regenesis90/coursework/main/ontology_engineering/Dataset/product_category.csv'

In [7]:
category_info = pd.read_csv(category_dir)

In [8]:
category_info.head(3)

Unnamed: 0,Temperature,Category,ID,Product_Name
0,Hot,Coffee,Brewed_Coffee_Dark_Roast,Brewed Coffee - Dark Roast
1,Hot,Coffee,Brewed_Coffee_Decaf_Pike_Place_Roast,Brewed Coffee - Decaf Pike Place Roast
2,Hot,Coffee,Brewed_Coffee_Medium_Roast,Brewed Coffee - Medium Roast


In [9]:
starbucks_info.dtypes

ID                      object
Beverage                object
Product                 object
Milk                    object
Whip                    object
Calories                 int64
Total_Fat              float64
Saturated_Fat          float64
Trans_Fat              float64
Cholesterol              int64
Sodium                   int64
Total_Carbohydrates      int64
Dietary_Fiber            int64
Sugar                    int64
Protein                float64
Vitamin_A                int64
Vitamin_C                int64
Calcium                  int64
Iron                     int64
Caffeine                 int64
dtype: object

# Develop Ontology

## Create New Ontology
* 새로운 온톨로지 생성(빈 온톨로지)

In [10]:
ontology_name = 'http://test.org/starbucks.owl'
onto = get_ontology(ontology_name)

## Class & Properties 생성

### Class 선언

In [20]:
with onto:
    class Starbucks(Thing):
        pass

In [21]:
# class 선언
with onto:
    class Beverage(Thing):
        pass
    
    class Category(Thing):
        pass
    
    class AddOns(Thing):
        pass

### Property 선언

In [12]:
# Beverage properties
with onto:
    #### 밀크, 휩, 카테고리, 애드온 ####
    
    class has_AddOns_name(DataProperty):
        domain = [AddOns]
        range = [str]
    
    class has_Beverage_name(DataProperty):
        domain = [Beverage]
        range = [str]
    
    class has_Milk(DataProperty):
        domain = [Beverage]
        range = [str]
        
    class has_Whip(DataProperty):
        domain = [Beverage]
        range = [str]
        
    class category(ObjectProperty):
        domain = [Beverage]
        range = [Category]
        
    class has_AddOns(ObjectProperty):
        domain = [Beverage]
        range = [AddOns]
        
    #### 영양분 ####
    class has_Calories(DataProperty):
        domain = [Or([Beverage, AddOns])]
        range = [float]

    class has_Total_Fat(DataProperty):
        domain = [Or([Beverage, AddOns])]
        range = [float]
        
    class has_Saturated_Fat(DataProperty):
        domain = [Or([Beverage, AddOns])]
        range = [float]
        
    class has_Trans_Fat(DataProperty):
        domain = [Or([Beverage, AddOns])]
        range = [float]
        
    class has_Cholesterol(DataProperty):
        domain = [Or([Beverage, AddOns])]
        range = [float]
        
    class has_Sodium(DataProperty):
        domain = [Or([Beverage, AddOns])]
        range = [float]
        
    class has_Total_Carbohydrates(DataProperty):
        domain = [Or([Beverage, AddOns])]
        range = [float]
        
    class has_Dietary_Fiber(DataProperty):
        domain = [Or([Beverage, AddOns])]
        range = [float]
        
    class has_Sugar(DataProperty):
        domain = [Or([Beverage, AddOns])]
        range = [float]
        
    class has_Protein(DataProperty):
        domain = [Or([Beverage, AddOns])]
        range = [float]
        
    class has_Vitamin_A(DataProperty):
        domain = [Or([Beverage, AddOns])]
        range = [float]
        
    class has_Vitamin_C(DataProperty):
        domain = [Or([Beverage, AddOns])]
        range = [float]
        
    class has_Calcium(DataProperty):
        domain = [Or([Beverage, AddOns])]
        range = [float]
        
    class has_Iron(DataProperty):
        domain = [Or([Beverage, AddOns])]
        range = [float]
        
    class has_Caffeine(DataProperty):
        domain = [Or([Beverage, AddOns])]
        range = [float]

## 개별 데이터 투입

### 함수 정의

In [14]:
def create_Beverage(ID:str, name:str, Category:str, Milk:str, Whip:str, Calories:float, Total_Fat:float, Saturated_Fat:float, Trans_Fat:float, Cholesterol:float, Sodium:float, Total_Carbohydrates:float, Dietary_Fiber:float, Sugar:float, Protein:float, Vitamin_A:float, Vitamin_C:float, Calcium:float, Iron:float, Caffeine:float):
    """음료 정보 투입 함수"""
    
    new_Beverage = Beverage(ID)
    new_Beverage.has_Beverage_name.append(name)
    
    if pd.isna(Milk) == False: # milk 값이 있으면
        new_Beverage.has_Milk.append(Milk)
    
    if pd.isna(Whip) == False: # Whip 값이 있으면
        new_Beverage.has_Whip.append(Whip)
    
    new_Beverage.has_Calories.append(Calories)
    new_Beverage.has_Total_Fat.append(Total_Fat)
    new_Beverage.has_Saturated_Fat.append(Saturated_Fat)
    new_Beverage.has_Trans_Fat.append(Trans_Fat)
    new_Beverage.has_Cholesterol.append(Cholesterol)
    new_Beverage.has_Sodium.append(Sodium)
    new_Beverage.has_Total_Carbohydrates.append(Total_Carbohydrates)
    new_Beverage.has_Dietary_Fiber.append(Dietary_Fiber)
    new_Beverage.has_Sugar.append(Sugar)
    new_Beverage.has_Protein.append(Protein)
    new_Beverage.has_Vitamin_A.append(Vitamin_A)
    new_Beverage.has_Vitamin_C.append(Vitamin_C)
    new_Beverage.has_Calcium.append(Calcium)
    new_Beverage.has_Iron.append(Iron)
    new_Beverage.has_Caffeine.append(Caffeine)
    
    #new_Beverage.Category.append(Category_info(Category))
    
    return new_Beverage

In [15]:
def create_AddOns(ID:str, name:str, Calories, Total_Fat, Saturated_Fat, Trans_Fat, Cholesterol, Sodium, Total_Carbohydrates, Dietary_Fiber, Sugar, Protein, Vitamin_A, Vitamin_C, Calcium, Iron, Caffeine):
    """애드온 정보 투입 함수"""
    
    new_AddOns = AddOns(ID)
    new_AddOns.has_AddOns_name.append(name)
    
    if pd.isna(Milk) == False: # milk 값이 있으면
        new_AddOns.has_Milk.append(Milk)
        
    if pd.isna(Whip) == False: # Whip 값이 있으면
        new_AddOns.has_Whip.append(Whip)
        
    new_AddOns.has_Calories.append(Calories)
    new_AddOns.has_Total_Fat.append(Total_Fat)
    new_AddOns.has_Saturated_Fat.append(Saturated_Fat)
    new_AddOns.has_Trans_Fat.append(Trans_Fat)
    new_AddOns.has_Cholesterol.append(Cholesterol)
    new_AddOns.has_Sodium.append(Sodium)
    new_AddOns.has_Total_Carbohydrates.append(Total_Carbohydrates)
    new_AddOns.has_Dietary_Fiber.append(Dietary_Fiber)
    new_AddOns.has_Sugar.append(Sugar)
    new_AddOns.has_Protein.append(Protein)
    new_AddOns.has_Vitamin_A.append(Vitamin_A)
    new_AddOns.has_Vitamin_C.append(Vitamin_C)
    new_AddOns.has_Calcium.append(Calcium)
    new_AddOns.has_Iron.append(Iron)
    new_AddOns.has_Caffeine.append(Caffeine)
    
    return new_AddOns

### 정보 투입

In [17]:
# Beverage 정보 넣기
for i in tqdm(range(len(starbucks_info))):
    
    cur_ID = str(starbucks_info['ID'].iloc[i])
    cur_name = str(starbucks_info['Beverage'].iloc[i])
    cur_Category = str(starbucks_info['Product'].iloc[i])
    cur_Milk = str(starbucks_info['Milk'].iloc[i])
    cur_Whip = str(starbucks_info['Whip'].iloc[i])
    
    cur_Calories = float(starbucks_info['Calories'].iloc[i])
    cur_Total_Fat = float(starbucks_info['Total_Fat'].iloc[i])
    cur_Saturated_Fat = float(starbucks_info['Saturated_Fat'].iloc[i])
    cur_Trans_Fat = float(starbucks_info['Trans_Fat'].iloc[i])
    cur_Cholesterol = float(starbucks_info['Cholesterol'].iloc[i])
    
    cur_Sodium = float(starbucks_info['Sodium'].iloc[i])
    cur_Total_Carbohydrates = float(starbucks_info['Total_Carbohydrates'].iloc[i])
    cur_Dietary_Fiber = float(starbucks_info['Dietary_Fiber'].iloc[i])
    cur_Sugar = float(starbucks_info['Sugar'].iloc[i])
    cur_Protein = float(starbucks_info['Protein'].iloc[i])
    
    cur_Vitamin_A = float(starbucks_info['Vitamin_A'].iloc[i])
    cur_Vitamin_C = float(starbucks_info['Vitamin_C'].iloc[i])
    cur_Calcium = float(starbucks_info['Calcium'].iloc[i])
    cur_Iron = float(starbucks_info['Iron'].iloc[i])
    cur_Caffeine = float(starbucks_info['Caffeine'].iloc[i])
    
    #print(i, cur_name, cur_Milk, cur_Whip, cur_Calories, cur_Total_Fat)
    
    new_Beverage = create_Beverage(cur_ID, cur_name, cur_Category, cur_Milk, cur_Whip, 
                                   cur_Calories, cur_Total_Fat, cur_Saturated_Fat, cur_Trans_Fat, cur_Cholesterol, 
                                   cur_Sodium, cur_Total_Carbohydrates, cur_Dietary_Fiber, cur_Sugar, cur_Protein, cur_Vitamin_A, cur_Vitamin_C, cur_Calcium, cur_Iron, cur_Caffeine)

100%|██████████████████████████████████████████████████████████████████████████████| 345/345 [00:00<00:00, 1285.96it/s]


In [None]:
# AddOns 정보 넣기
for i in tqdm(range(len(starbucks_info))):
    
    cur_ID = str(starbucks_info['ID'].iloc[i])
    cur_name = str(starbucks_info['Beverage'].iloc[i])
    cur_Category = str(starbucks_info['Product'].iloc[i])
    cur_Milk = str(starbucks_info['Milk'].iloc[i])
    cur_Whip = str(starbucks_info['Whip'].iloc[i])
    
    cur_Calories = float(starbucks_info['Calories'].iloc[i])
    cur_Total_Fat = float(starbucks_info['Total_Fat'].iloc[i])
    cur_Saturated_Fat = float(starbucks_info['Saturated_Fat'].iloc[i])
    cur_Trans_Fat = float(starbucks_info['Trans_Fat'].iloc[i])
    cur_Cholesterol = float(starbucks_info['Cholesterol'].iloc[i])
    
    cur_Sodium = float(starbucks_info['Sodium'].iloc[i])
    cur_Total_Carbohydrates = float(starbucks_info['Total_Carbohydrates'].iloc[i])
    cur_Dietary_Fiber = float(starbucks_info['Dietary_Fiber'].iloc[i])
    cur_Sugar = float(starbucks_info['Sugar'].iloc[i])
    cur_Protein = float(starbucks_info['Protein'].iloc[i])
    
    cur_Vitamin_A = float(starbucks_info['Vitamin_A'].iloc[i])
    cur_Vitamin_C = float(starbucks_info['Vitamin_C'].iloc[i])
    cur_Calcium = float(starbucks_info['Calcium'].iloc[i])
    cur_Iron = float(starbucks_info['Iron'].iloc[i])
    cur_Caffeine = float(starbucks_info['Caffeine'].iloc[i])
    
    #print(i, cur_name, cur_Milk, cur_Whip, cur_Calories, cur_Total_Fat)
    
    new_Beverage = create_Beverage(cur_ID, cur_name, cur_Category, cur_Milk, cur_Whip, 
                                   cur_Calories, cur_Total_Fat, cur_Saturated_Fat, cur_Trans_Fat, cur_Cholesterol, 
                                   cur_Sodium, cur_Total_Carbohydrates, cur_Dietary_Fiber, cur_Sugar, cur_Protein, cur_Vitamin_A, cur_Vitamin_C, cur_Calcium, cur_Iron, cur_Caffeine)

## 구축 온톨로지 저장

In [18]:
onto.save(file = 'ontology_starbucks.rdf', format = 'rdfxml')