# üéôÔ∏è EchoAccent - With Cuisine Info

**Predict accent AND discover regional cuisine!**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
PROJECT_DIR = '/content/drive/MyDrive/IndicAccent_Project'
os.chdir(PROJECT_DIR)
print(f'‚úÖ Working directory: {os.getcwd()}')

In [None]:
!pip install -q gradio transformers librosa torch torchaudio soundfile
!apt-get install -y ffmpeg > /dev/null 2>&1
print('‚úÖ Dependencies installed!')

In [None]:
import torch
import torch.nn as nn
import numpy as np
import librosa
import soundfile as sf
import warnings
from transformers import HubertModel, Wav2Vec2FeatureExtractor

warnings.filterwarnings('ignore')

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Device: {device}')

# Label mapping with cuisine info
label_map = {
    0: "Telugu",
    1: "Tamil",
    2: "Malayalam",
    3: "Kannada",
    4: "Hindi",
    5: "Gujarati"
}

# Cuisine information for each accent
cuisine_info = {
    "Telugu": {
        "region": "Andhra Pradesh & Telangana",
        "famous_dishes": [
            "üçõ Hyderabadi Biryani",
            "üå∂Ô∏è Gongura Pachadi (Sorrel Chutney)",
            "ü•ò Pulihora (Tamarind Rice)",
            "üç≤ Pesarattu (Green Gram Dosa)",
            "ü•ó Gutti Vankaya (Stuffed Eggplant)"
        ],
        "specialty": "Known for spicy and tangy flavors, extensive use of tamarind and red chilies",
        "emoji": "üå∂Ô∏è"
    },
    "Tamil": {
        "region": "Tamil Nadu",
        "famous_dishes": [
            "üçõ Chettinad Chicken",
            "ü•û Dosa & Idli",
            "üç≤ Sambar",
            "ü•ò Pongal",
            "üçö Lemon Rice"
        ],
        "specialty": "Rice-based dishes, aromatic spices, coconut and curry leaves",
        "emoji": "ü••"
    },
    "Malayalam": {
        "region": "Kerala",
        "famous_dishes": [
            "üêü Kerala Fish Curry",
            "ü•û Appam with Stew",
            "üçå Sadya (Banana Leaf Feast)",
            "ü•ò Puttu & Kadala",
            "ü¶ê Prawn Moilee"
        ],
        "specialty": "Coconut-based curries, seafood, banana chips, use of coconut oil",
        "emoji": "ü••"
    },
    "Kannada": {
        "region": "Karnataka",
        "famous_dishes": [
            "üçõ Bisi Bele Bath",
            "ü•û Mysore Masala Dosa",
            "ü•ò Ragi Mudde",
            "üç≤ Jolada Rotti with Ennegai",
            "üç¨ Mysore Pak"
        ],
        "specialty": "Diverse cuisine from coastal to North Karnataka, use of jaggery and coconut",
        "emoji": "üçõ"
    },
    "Hindi": {
        "region": "North India (Delhi, UP, MP, Rajasthan)",
        "famous_dishes": [
            "üçõ Butter Chicken",
            "ü´ì Naan & Roti",
            "ü•ò Dal Makhani",
            "üç≤ Chole Bhature",
            "ü•ó Paneer Tikka"
        ],
        "specialty": "Rich gravies, tandoor cooking, dairy products, wheat-based breads",
        "emoji": "ü´ì"
    },
    "Gujarati": {
        "region": "Gujarat",
        "famous_dishes": [
            "üçõ Dhokla",
            "ü•ò Undhiyu",
            "ü´ì Thepla",
            "üç≤ Gujarati Kadhi",
            "üç¨ Mohanthal"
        ],
        "specialty": "Sweet and savory combinations, vegetarian cuisine, use of jaggery and sugar",
        "emoji": "üç¨"
    }
}

print('‚úÖ Cuisine database loaded!')

In [None]:
class MFCCModel(nn.Module):
    def __init__(self, input_dim=80, num_classes=6):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256), nn.ReLU(), nn.Dropout(0.3),
            nn.Linear(256, 128), nn.ReLU(), nn.Dropout(0.3),
            nn.Linear(128, num_classes)
        )
    def forward(self, x):
        return self.net(x)

class HuBERTClassifier(nn.Module):
    def __init__(self, input_dim=768, num_classes=6):
        super().__init__()
        self.classifier = nn.Sequential(
            nn.Linear(input_dim, 256), nn.ReLU(), nn.Dropout(0.4),
            nn.Linear(256, 128), nn.ReLU(), nn.Dropout(0.3),
            nn.Linear(128, num_classes)
        )
    def forward(self, x):
        return self.classifier(x)

print('‚úÖ Models defined')

In [None]:
mfcc_model = MFCCModel().to(device)
mfcc_model.load_state_dict(torch.load(f'{PROJECT_DIR}/mfcc_best_model.pt', map_location=device))
mfcc_model.eval()
print('‚úÖ MFCC model loaded')

try:
    hubert_classifier = HuBERTClassifier().to(device)
    hubert_classifier.load_state_dict(torch.load(f'{PROJECT_DIR}/hubert_best_model.pt', map_location=device))
    hubert_classifier.eval()
    feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("facebook/hubert-base-ls960")
    hubert = HubertModel.from_pretrained("facebook/hubert-base-ls960").to(device)
    hubert.eval()
    hubert_available = True
    print('‚úÖ HuBERT model loaded')
except:
    hubert_available = False
    print('‚ö†Ô∏è HuBERT not found')

In [None]:
TARGET_SR = 16000

def load_audio_safe(audio_path):
    try:
        arr, sr = sf.read(audio_path)
        if len(arr.shape) > 1:
            arr = arr.mean(axis=1)
        if sr != TARGET_SR:
            arr = librosa.resample(arr, orig_sr=sr, target_sr=TARGET_SR)
        return arr
    except:
        arr, sr = librosa.load(audio_path, sr=TARGET_SR, mono=True)
        return arr

def extract_mfcc_features(audio_path):
    arr = load_audio_safe(audio_path)
    arr = arr / (np.max(np.abs(arr)) + 1e-9)
    mfcc = librosa.feature.mfcc(y=arr, sr=TARGET_SR, n_mfcc=40)
    return np.concatenate([mfcc.mean(axis=1), mfcc.std(axis=1)])

def extract_hubert_features(audio_path):
    arr = load_audio_safe(audio_path)
    arr = arr / (np.max(np.abs(arr)) + 1e-9)
    inputs = feature_extractor(arr, sampling_rate=TARGET_SR, return_tensors="pt", padding=True)
    with torch.no_grad():
        outputs = hubert(inputs.input_values.to(device))
        embedding = outputs.last_hidden_state.mean(dim=1).cpu().numpy()[0]
    return embedding

def format_cuisine_info(accent):
    """Format cuisine information for display"""
    info = cuisine_info[accent]
    
    result = f"\n{info['emoji']} **{accent} Cuisine** ({info['region']})\n\n"
    result += f"**Specialty:** {info['specialty']}\n\n"
    result += "**Famous Dishes:**\n"
    for dish in info['famous_dishes']:
        result += f"  ‚Ä¢ {dish}\n"
    
    return result

print('‚úÖ Functions ready')

In [None]:
def predict_mfcc_with_cuisine(audio_path):
    if audio_path is None:
        return {"Error": "Please upload or record audio first"}, ""
    
    try:
        feat = extract_mfcc_features(audio_path)
        feat_tensor = torch.tensor(feat, dtype=torch.float32).unsqueeze(0).to(device)
        
        with torch.no_grad():
            logits = mfcc_model(feat_tensor)
            probs = torch.softmax(logits, dim=1)[0].cpu().numpy()
        
        result = {label_map[i]: float(probs[i]) for i in range(6)}
        
        # Get top prediction
        top_accent = max(result.items(), key=lambda x: x[1])[0]
        cuisine_text = format_cuisine_info(top_accent)
        
        return result, cuisine_text
    
    except Exception as e:
        return {"Error": f"Failed: {str(e)}"}, ""

def predict_hubert_with_cuisine(audio_path):
    if audio_path is None:
        return {"Error": "Please upload or record audio first"}, ""
    
    if not hubert_available:
        return {"Error": "HuBERT model not available"}, ""
    
    try:
        feat = extract_hubert_features(audio_path)
        feat_tensor = torch.tensor(feat, dtype=torch.float32).unsqueeze(0).to(device)
        
        with torch.no_grad():
            logits = hubert_classifier(feat_tensor)
            probs = torch.softmax(logits, dim=1)[0].cpu().numpy()
        
        result = {label_map[i]: float(probs[i]) for i in range(6)}
        
        # Get top prediction
        top_accent = max(result.items(), key=lambda x: x[1])[0]
        cuisine_text = format_cuisine_info(top_accent)
        
        return result, cuisine_text
    
    except Exception as e:
        return {"Error": f"Failed: {str(e)}"}, ""

def predict_both_with_cuisine(audio_path):
    if audio_path is None:
        return {}, {}, "", ""
    
    mfcc_result, mfcc_cuisine = predict_mfcc_with_cuisine(audio_path)
    hubert_result, hubert_cuisine = predict_hubert_with_cuisine(audio_path)
    
    # Comparison text
    comparison = "üìä **Model Comparison**\n\n"
    
    if "Error" not in mfcc_result:
        mfcc_top = max(mfcc_result.items(), key=lambda x: x[1])[0]
        comparison += f"üéØ MFCC predicts: **{mfcc_top}**\n"
    
    if "Error" not in hubert_result:
        hubert_top = max(hubert_result.items(), key=lambda x: x[1])[0]
        comparison += f"ü§ñ HuBERT predicts: **{hubert_top}**\n\n"
    
    if "Error" not in mfcc_result and "Error" not in hubert_result:
        if mfcc_top == hubert_top:
            comparison += f"‚úÖ Both models agree on **{mfcc_top}**!\n"
            cuisine_display = mfcc_cuisine
        else:
            comparison += f"‚ö†Ô∏è Models disagree. Showing both cuisines below."
            cuisine_display = "**MFCC Prediction:**" + mfcc_cuisine + "\n\n**HuBERT Prediction:**" + hubert_cuisine
    else:
        cuisine_display = mfcc_cuisine if mfcc_cuisine else hubert_cuisine
    
    return mfcc_result, hubert_result, comparison, cuisine_display

print('‚úÖ Prediction functions ready')

In [None]:
import gradio as gr

with gr.Blocks(title="EchoAccent with Cuisine", theme=gr.themes.Soft()) as demo:
    
    gr.Markdown(
        """
        # üéôÔ∏è EchoAccent - Accent Classifier with Cuisine Discovery
        
        **Predict accent AND discover regional cuisine!**
        
        Upload audio or record your voice to:
        - üéØ Identify your Indian accent
        - üçõ Discover famous dishes from that region
        - üå∂Ô∏è Learn about regional food specialties
        """
    )
    
    with gr.Tabs():
        
        with gr.Tab("üéØ MFCC Model"):
            with gr.Row():
                with gr.Column():
                    audio_mfcc = gr.Audio(
                        sources=["upload", "microphone"],
                        type="filepath",
                        label="üé§ Record or üìÅ Upload Audio"
                    )
                    btn_mfcc = gr.Button("üéØ Predict Accent & Cuisine", variant="primary", size="lg")
                
                with gr.Column():
                    out_mfcc = gr.Label(num_top_classes=6, label="Accent Prediction")
            
            cuisine_mfcc = gr.Markdown(label="Regional Cuisine")
            
            btn_mfcc.click(predict_mfcc_with_cuisine, audio_mfcc, [out_mfcc, cuisine_mfcc])
        
        with gr.Tab("ü§ñ HuBERT Model"):
            with gr.Row():
                with gr.Column():
                    audio_hubert = gr.Audio(
                        sources=["upload", "microphone"],
                        type="filepath",
                        label="üé§ Record or üìÅ Upload Audio"
                    )
                    btn_hubert = gr.Button("ü§ñ Predict Accent & Cuisine", variant="primary", size="lg")
                
                with gr.Column():
                    out_hubert = gr.Label(num_top_classes=6, label="Accent Prediction")
            
            cuisine_hubert = gr.Markdown(label="Regional Cuisine")
            
            btn_hubert.click(predict_hubert_with_cuisine, audio_hubert, [out_hubert, cuisine_hubert])
        
        with gr.Tab("üîç Compare Both"):
            audio_both = gr.Audio(
                sources=["upload", "microphone"],
                type="filepath",
                label="üé§ Record or üìÅ Upload Audio"
            )
            btn_both = gr.Button("üîç Compare Models & Show Cuisine", variant="primary", size="lg")
            
            with gr.Row():
                out_mfcc_cmp = gr.Label(num_top_classes=6, label="MFCC")
                out_hubert_cmp = gr.Label(num_top_classes=6, label="HuBERT")
            
            comparison_text = gr.Markdown(label="Comparison")
            cuisine_both = gr.Markdown(label="Regional Cuisine")
            
            btn_both.click(
                predict_both_with_cuisine, 
                audio_both, 
                [out_mfcc_cmp, out_hubert_cmp, comparison_text, cuisine_both]
            )
    
    gr.Markdown(
        """
        ---
        ### üí° Tips:
        - Speak in your **native language** for best results
        - Record **5-10 seconds** of clear speech
        - Use a **quiet environment**
        
        ### üçõ About the Cuisines:
        Each accent is associated with a rich culinary tradition. Discover famous dishes and specialties from:
        - üå∂Ô∏è Telugu (Andhra/Telangana) - Spicy & Tangy
        - ü•• Tamil (Tamil Nadu) - Rice & Coconut
        - ü•• Malayalam (Kerala) - Seafood & Coconut
        - üçõ Kannada (Karnataka) - Diverse & Flavorful
        - ü´ì Hindi (North India) - Rich & Creamy
        - üç¨ Gujarati (Gujarat) - Sweet & Savory
        """
    )

print('‚úÖ Interface created!')

In [None]:
demo.launch(share=True, debug=False)

print('\n‚úÖ Demo launched with cuisine info!')
print('\nüçõ Now you can discover regional cuisines along with accent predictions!')