In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Überprüfen und Installieren von Gradio
try:
    import gradio as gr
except ImportError:
    import os
    os.system('pip install gradio')
    import gradio as gr

# Load and process data
def load_and_process_data(file_paths):
    drinks_per_guest_and_hour = {drink: [] for drink in ['beer', 'white_wine', 'red_wine', 'sparkling_wine', 'soft_drinks', 'water', 'longdrinks']}
    features = []
    y = []

    for file_path in file_paths:
        df = pd.read_csv(file_path, delimiter=';')
        for index, row in df.iterrows():
            guest_count = row['guest_count']
            event_duration = row['event_duration']
            if guest_count != 0 and event_duration != 0:
                for drink in drinks_per_guest_and_hour:
                    if drink == 'longdrinks':
                        drinks_per_guest_and_hour[drink].append(row[drink] * 0.2 / (guest_count * event_duration))
                    else:
                        drinks_per_guest_and_hour[drink].append(row[drink] / (guest_count * event_duration))
            else:
                for drink in drinks_per_guest_and_hour:
                    drinks_per_guest_and_hour[drink].append(0)

            features.append([row['quartal'], event_duration, guest_count, row['average_age'], row['gender_ratio_percent'], row['event_type']])
            y.append([row[drink] / (guest_count * event_duration) if guest_count != 0 and event_duration != 0 else 0 for drink in drinks_per_guest_and_hour])

    return drinks_per_guest_and_hour, features, y

# Train the model
def train_model(features, y):
    X_train, X_test, y_train, y_test = train_test_split(features, y, test_size=0.2, random_state=42)
    model = RandomForestRegressor(random_state=42)
    model.fit(X_train, y_train)
    mse = mean_squared_error(y_test, model.predict(X_test))
    return model, mse

# Prediction function with formatted output
def predict_drinks(quartal, event_duration, guest_count, average_age, gender_ratio_percent, event_type, beer, white_wine, red_wine, sparkling_wine, soft_drinks, water, longdrinks):
    custom_features = [[quartal, event_duration, guest_count, average_age, gender_ratio_percent, event_type]]
    predictions = model.predict(custom_features)
    longdrinks_index = list(drinks_per_guest_and_hour.keys()).index('longdrinks')
    predictions[0][longdrinks_index] *= 0.2

    selected_drinks = [drink for drink, selected in zip(drinks_per_guest_and_hour.keys(), [beer, white_wine, red_wine, sparkling_wine, soft_drinks, water, longdrinks]) if selected]
    predictions_selected = [predictions[0][list(drinks_per_guest_and_hour.keys()).index(drink)] for drink in selected_drinks]

    if any(drink in ['water', 'soft_drinks'] for drink in selected_drinks):
        sum_non_alcoholic = 0
        non_selected_non_alcoholic_count = 0
        for drink in ['water', 'soft_drinks']:
            if drink not in selected_drinks:
                index = list(drinks_per_guest_and_hour.keys()).index(drink)
                sum_non_alcoholic += predictions[0][index]
                non_selected_non_alcoholic_count += 1

        if non_selected_non_alcoholic_count != 0:
            sum_non_alcoholic_per_selected = sum_non_alcoholic / len([drink for drink in [water, soft_drinks] if drink])
            for i in range(len(predictions_selected)):
                if selected_drinks[i] in ['water', 'soft_drinks']:
                    predictions_selected[i] += sum_non_alcoholic_per_selected

    total_alcoholic = sum(predictions[0][i] for i, drink in enumerate(drinks_per_guest_and_hour) if drink in ['beer', 'white_wine', 'red_wine', 'sparkling_wine', 'longdrinks'])
    total_selected_alcoholic = sum(predictions_selected[i] for i, drink in enumerate(selected_drinks) if drink in ['beer', 'white_wine', 'red_wine', 'sparkling_wine', 'longdrinks'])
    total_non_selected_alcoholic = 0
    for drink in ['beer', 'white_wine', 'red_wine', 'sparkling_wine', 'longdrinks']:
        if drink not in selected_drinks:
            index = list(drinks_per_guest_and_hour.keys()).index(drink)
            total_non_selected_alcoholic += predictions[0][index]

    for i, drink in enumerate(selected_drinks):
        if drink not in ['Wasser', 'Softdrink']:
            if total_selected_alcoholic != 0 and drink in ['beer', 'white_wine', 'red_wine', 'sparkling_wine', 'longdrinks']:
                correction_factor = predictions_selected[i] / (total_selected_alcoholic / 100) / 100 * total_non_selected_alcoholic
                predictions_selected[i] = predictions_selected[i] + correction_factor

    total_amounts_selected = [predictions_selected[i] * guest_count * event_duration for i in range(len(selected_drinks))]

    # Format the predictions to two decimal places
    formatted_predictions = [round(pred, 2) for pred in predictions_selected]
    formatted_amounts = [round(amount, 2) for amount in total_amounts_selected]

    # Map English drink names to German
    drink_names_de = {
        'beer': 'Bier',
        'white_wine': 'Weißwein',
        'red_wine': 'Rotwein',
        'sparkling_wine': 'Sekt',
        'soft_drinks': 'Softgetränke',
        'water': 'Wasser',
        'longdrinks': 'Longdrinks'
    }

    result = {
        'Menge pro Gast und Stunde (Liter)': dict(zip([drink_names_de[drink] for drink in selected_drinks], formatted_predictions)),
        'Gesamtmengen (Liter)': dict(zip([drink_names_de[drink] for drink in selected_drinks], formatted_amounts)),
    }

    # Calculate and add container sizes
    container_sizes = {
        'Bier': '(24 x 0,33L)',
        'Weißwein': '(0,75L)',
        'Rotwein': '(0,75L)',
        'Sekt': '(0,75L)',
        'Softgetränke': '(24 x 0,33L)',
        'Wasser': '(24 x 0,33L)',
        'Longdrinks': 'Stück'
    }

    container_amounts = {}
    for drink, amount in result['Gesamtmengen (Liter)'].items():
        if drink == 'Longdrinks':
            container_amounts[drink] = f"{round(amount / 0.2, 2)} {container_sizes[drink]}"  # Assuming 0.2 liters per Longdrink
        elif drink in ['Bier', 'Softgetränke', 'Wasser']:
            container_amounts[drink] = f"{round(amount / (0.33 * 24), 2)} {container_sizes[drink]}"  # Assuming 0.33 liters per bottle and 24 bottles per crate
        else:
            container_amounts[drink] = f"{round(amount / 0.75, 2)} {container_sizes[drink]}"  # Assuming 0.75 liters per bottle for wine and sparkling wine

    result['Gebindegrößen'] = container_amounts
    result['Mean Squared Error (MSE)'] = round(mse, 4)

    # Format the output as a readable text
    output_text = (
        "<style>pre, h3 {font-family: Arial, sans-serif;}</style>" +
        "<h3>Menge pro Gast und Stunde Liter:</h3>\n"
        "<pre>{}</pre>\n\n".format('\n'.join([f'{drink}: {quantity}' for drink, quantity in result['Menge pro Gast und Stunde (Liter)'].items()])) +
        "<h3>Gesamtmengen Liter:</h3>\n"
        "<pre>{}</pre>\n\n".format('\n'.join([f'{drink}: {quantity}' for drink, quantity in result['Gesamtmengen (Liter)'].items()])) +
        "<h3>Gebindegrößen:</h3>\n"
        "<pre>{}</pre>\n\n".format('\n'.join([f'{drink}: {quantity}' for drink, quantity in result['Gebindegrößen'].items()])) +
        "<h3>Mean Squared Error (MSE):</h3> {}".format(result['Mean Squared Error (MSE)'])
    )

    return output_text

# Main execution
file_paths = [
    'https://raw.githubusercontent.com/makis62547/Getr-nkeprognose/main/Glaskugel%20331.csv',
    'https://raw.githubusercontent.com/makis62547/Getr-nkeprognose/main/Glaskugeldaten%20MB.csv'
]
drinks_per_guest_and_hour, features, y = load_and_process_data(file_paths)
model, mse = train_model(features, y)

# Event type mapping
event_type_mapping = {
    'Abend VA mit Musik etc.': 1,
    'Veranstaltung mit Program, Vorträge, Präsentationen etc.': 2,
    'Hochzeit': 3,
    'Firmenevent für Kunden': 4,
    'Firmenevent für Mitarbeiter': 5,
    'Familienfeier, Geburtstage, Gartenfest etc.': 6
}

# Gradio Interface
def gradio_interface(quartal, event_duration, guest_count, average_age, gender_ratio_percent, event_type, beer, white_wine, red_wine, sparkling_wine, soft_drinks, water, longdrinks):
    event_type_value = event_type_mapping[event_type]
    return predict_drinks(quartal, event_duration, guest_count, average_age, gender_ratio_percent, event_type_value, beer, white_wine, red_wine, sparkling_wine, soft_drinks, water, longdrinks)

inputs = [
    gr.components.Number(label="Quartal"),
    gr.components.Number(label="Dauer der Veranstaltung (Stunden)"),
    gr.components.Number(label="Anzahl der Gäste"),
    gr.components.Number(label="Durchschnittliches Alter der Gäste"),
    gr.components.Number(label="Geschlechterverhältnis (Männer %)"),
    gr.components.Dropdown(choices=list(event_type_mapping.keys()), label="Eventtyp"),
    gr.components.Checkbox(label="Bier"),
    gr.components.Checkbox(label="Weißwein"),
    gr.components.Checkbox(label="Rotwein"),
    gr.components.Checkbox(label="Sekt"),
    gr.components.Checkbox(label="Softdrinks"),
    gr.components.Checkbox(label="Wasser"),
    gr.components.Checkbox(label="Longdrinks")
]

outputs = gr.components.HTML(label="Vorhergesagte Getränke pro Gast und Stunde")

gr.Interface(fn=gradio_interface,
             inputs=inputs,
             outputs=outputs,
             title=" Glaskugel - Getränkeprognose für Veranstaltungen").launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://2b81ee1b93f018be76.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


