In [3]:
import pandas as pd
import re

In [4]:
# Load the CSV data
df = pd.read_csv('coffee_dataset.csv')  # Replace with your actual CSV filename

In [5]:
# Function to clean and format strings for URIs
def format_uri(text):
    if pd.isna(text) or text == 'Unknown' or text == 'unknown':
        return 'Unknown'
    # Remove special characters and replace spaces with underscores
    cleaned = re.sub(r'[^\w\s]', '', str(text))
    return cleaned.replace(' ', '')

# Function to map CSV values to ontology URIs
def map_to_ontology(value, category):
    if pd.isna(value) or value == 'Unknown' or value == 'unknown':
        return f':Unknown'
    
    value = str(value).strip()
    
    if category == 'Country_of_Origin':
        country_map = {
            'Ethiopia': ':Ethiopia',
            'Brazil': ':Brazil', 
            'Guatemala': ':Guatemala',
            'Peru': ':Peru',
            'Mexico': ':Mexico',
            'China': ':China',
            'Costa Rica': ':CostaRica',
            'Uganda': ':Uganda',
            'Taiwan': ':Taiwan',
            'Tanzania': ':Tanzania',
            'Kenya': ':Kenya',
            'Colombia': ':Colombia',
            'Panama': ':Panama',
            'El Salvador': ':ElSalvador',
            'Indonesia': ':Indonesia',
            'Honduras': ':Honduras',
            'United States (Hawaii)': ':UnitedStatesHawaii',
            'United States (Puerto Rico)': ':UnitedStatesPuertoRico',
            'Nicaragua': ':Nicaragua'
        }
        return country_map.get(value, f':{format_uri(value)}')
    
    elif category == 'Color':
        color_map = {
            'green': ':Green',
            'blue-green': ':BlueGreen', 
            'yellow': ':Yellow',
            'yellow-green': ':YellowGreen',
            'brown': ':Brown',
            'brown-green': ':BrownGreen'
        }
        return color_map.get(value.lower(), f':{format_uri(value)}')
    
    elif category == 'Processing_Method':
        processing_map = {
            'Washed / Wet': ':WashedWet',
            'Natural / Dry': ':NaturalDry',
            'Semi-washed / Semi-pulped': ':SemiWashed',
            'Pulped natural / honey': ':PulpedNaturalHoney',
            'Other': ':OtherProcessing'
        }
        return processing_map.get(value, f':{format_uri(value)}')
    
    elif category == 'QualityTier':
        quality_map = {
            'Below Specialty Grade': ':BelowSpecialtyGrade',
            'Specialty Grade': ':SpecialtyGrade',
            'Premium Grade': ':PremiumGrade', 
            'Elite Grade': ':EliteGrade'
        }
        return quality_map.get(value, f':{format_uri(value)}')
    
    elif category == 'BrewingRecommendation':
        brewing_map = {
            'Pour Over Brewing': ':PourOverBrewing',
            'French Press Brewing': ':FrenchPressBrewing',
            'Aeropress Brewing': ':AeropressBrewing',
            'Cold Brew Brewing': ':ColdBrewBrewing'
        }
        return brewing_map.get(value, f':{format_uri(value)}')
    
    return f':{format_uri(value)}'

# Function to parse taste categories from string
def parse_taste_categories(taste_str):
    if pd.isna(taste_str):
        return []
    
    # Clean the string and extract taste categories
    taste_str = str(taste_str).replace("'", "").replace("[", "").replace("]", "")
    tastes = [taste.strip() for taste in taste_str.split(",") if taste.strip()]
    return tastes

In [6]:
# Generate TTL content
ttl_content = """@prefix : <http://coffee-quality.org/ontology/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

"""

# Process each row in the CSV
for index, row in df.iterrows():
    lot_id = f":CoffeeLot_{index + 1:03d}"
    
    ttl_content += f"{lot_id} a :CoffeeLot ;\n"
    ttl_content += f"    :hasSpecies \"{row['Species']}\" ;\n"
    ttl_content += f"    :fromCountry {map_to_ontology(row['Country_of_Origin'], 'Country_of_Origin')} ;\n"
    ttl_content += f"    :harvestYear \"{row['Harvest_Year']}\" ;\n"
    ttl_content += f"    :hasColor {map_to_ontology(row['Color'], 'Color')} ;\n"
    ttl_content += f"    :hasProcessingMethod {map_to_ontology(row['Processing_Method'], 'Processing_Method')} ;\n"
    
    # Quality scores
    ttl_content += f"    :aromaScore {row['Aroma']} ;\n"
    ttl_content += f"    :flavorScore {row['Flavor']} ;\n"
    ttl_content += f"    :aftertasteScore {row['Aftertaste']} ;\n"
    ttl_content += f"    :acidityScore {row['Acidity']} ;\n"
    ttl_content += f"    :bodyScore {row['Body']} ;\n"
    ttl_content += f"    :balanceScore {row['Balance']} ;\n"
    ttl_content += f"    :uniformityScore {row['Uniformity']} ;\n"
    ttl_content += f"    :cleanCupScore {row['Clean_Cup']} ;\n"
    ttl_content += f"    :sweetnessScore {row['Sweetness']} ;\n"
    ttl_content += f"    :cupperPoints {row['Cupper_Points']} ;\n"
    ttl_content += f"    :totalCupPoints {row['Total_Cup_Points']} ;\n"
    
    # Taste categories as string
    taste_categories_str = str(row['Taste_Categories']).replace("'", "").replace("[", "").replace("]", "")
    ttl_content += f"    :tasteCategories \"{taste_categories_str}\" ;\n"
    
    # Boolean taste properties
    ttl_content += f"    :isAromatic {row['Category_Aromatic']} ;\n"
    ttl_content += f"    :isComplex {row['Category_Complex']} ;\n"
    ttl_content += f"    :isFullBodied {row['Category_Full-bodied']} ;\n"
    ttl_content += f"    :isLingering {row['Category_Lingering']} ;\n"
    ttl_content += f"    :isSmooth {row['Category_Smooth']} ;\n"
    ttl_content += f"    :isSour {row['Category_Sour']} ;\n"
    ttl_content += f"    :isSweet {row['Category_Sweet']} ;\n"
    
    # Quality tier
    ttl_content += f"    :hasQualityTier {map_to_ontology(row['QualityTier'], 'QualityTier')} ;\n"
    
    # Brewing recommendations (handle multiple)
    brewing_recs = str(row['BrewingRecommendations']).split(';')
    brewing_uris = []
    for rec in brewing_recs:
        rec = rec.strip()
        if rec:
            brewing_uris.append(map_to_ontology(rec, 'BrewingRecommendation'))
    
    for i, brewing_uri in enumerate(brewing_uris):
        if i == len(brewing_uris) - 1:
            ttl_content += f"    :hasBrewingRecommendation {brewing_uri} .\n\n"
        else:
            ttl_content += f"    :hasBrewingRecommendation {brewing_uri} ;\n"

# Write to TTL file
with open('coffee_data.ttl', 'w', encoding='utf-8') as f:
    f.write(ttl_content)

print(f"Successfully converted {len(df)} coffee lots to TTL format!")
print("Output file: coffee_data.ttl")

Successfully converted 1517 coffee lots to TTL format!
Output file: coffee_data.ttl
