This is how I did a Kaggle using elasticsearch
=====
This code is explained in my blog article: http://melvyn.pythonanywhere.com/posts/1/  Those piece of code will allow you to reach more than 76%.

In [29]:
from pyelasticsearch.client import ElasticSearch
import random, ast, csv, requests, json, operator

# Transform the ingredient list in a string
def list_to_string_ingredients(recipes):
    for recipe in recipes:
        recipe['ingredients'] = ', '.join(recipe['ingredients'])
    return recipes

# Index a list of recipe in a given ES index
def index_recipes_in_ES(index, recipes):
    es = ElasticSearch()
    map_id_esid = {}
    for recipe in recipes:
        res = es.index(index=index, doc_type='recipe', id=recipe['id'], doc=recipe)
        if res['created'] == False:
            print res

# Build your more like this query
def build_mlt(nb, doc_id):
    mlt = {}
    mlt["from"] = 0
    mlt["size"] = nb
    mlt["query"] = {}
    mlt["query"]["more_like_this"] = {}
    mlt["query"]["more_like_this"]["fields"] = ["ingredients"]
    mlt["query"]["more_like_this"]["like"] = [{"_index" : "test-recipe","_type" : "recipe","_id" : doc_id}]
    mlt["query"]["more_like_this"]["min_term_freq"] = 1
    mlt["query"]["more_like_this"]["max_query_terms"] = 50
    mlt["query"]["more_like_this"]["minimum_should_match"] = "25%"
    return mlt

# Compute the difference between two cuisines distributions
def diff_distribution(similar_recipes):
    distribution = {}
    for recipe in similar_recipes:
        if recipe['cuisine'] in distribution:
            distribution[recipe['cuisine']] += 1
        else:
            distribution[recipe['cuisine']] = 1
    for cuisine in distribution:
        distribution[cuisine] = distribution[cuisine] * 100 / len(similar_recipes) 
        distribution[cuisine] = distribution[cuisine] - CUISINE_DISTRIBUTION[cuisine]
    prediction = max(distribution.iteritems(), key=operator.itemgetter(1))[0]
    return prediction

# Extract as a list the result from Elasticsearch
def extract_from_json(json):
    hits = json['hits']['hits']
    recipes = [hit['_source'] for hit in hits]
    return recipes

# Send the request to elasticsearch and extract the result
def get_similar(nb, doc_id):
    mlt = build_mlt(nb, doc_id)
    response = requests.post("http://localhost:9200/train-recipe/recipe/_search", data=json.dumps(mlt))
    similar_recipes = extract_from_json(json.loads(response.text))
    return similar_recipes

# Retrieve a specific document givent its ID
def get_document_by_id(index, doc_id):
    es = ElasticSearch()
    response = es.get(index=index, doc_type='recipe', id=doc_id)
    return response['_source']

# Launch a prediction on a list of recipes, with a given prediction function, and a number of similar document required
def predict(recipes_to_predict, predict_origin, mlt_nb=100):
    predictions = []
    for recipe in recipes_to_predict:
        similar_recipes = get_similar(mlt_nb, recipe['id'])
        prediction = predict_origin(similar_recipes)
        predictions.append((prediction, recipe['id']))
    return predictions

# Transform a set of recipe predicted into a CSV ready to drop on Kaggle
def to_csv(predictions):
    f = csv.writer(open("results.csv", "wb+"))
    f.writerow(["id", "cuisine"])
    for prediction in predictions:
        f.writerow([prediction[1], prediction[0]])

# Compute the cuisine distribution of list of recipe
def get_cuisine_distribution(recipes):
    cuisine_distribution = {}
    for recipe in recipes:
        if recipe['cuisine'] in cuisine_distribution:
            cuisine_distribution[recipe['cuisine']] += 1
        else:
            cuisine_distribution[recipe['cuisine']] = 1
    for cuisine in cuisine_distribution:
        cuisine_distribution[cuisine] =  float(cuisine_distribution[cuisine])/float(len(recipes))
    return cuisine_distribution

In [None]:
data_file = open('data/train.json', 'r')
train_recipes = ast.literal_eval(data_file.read())
data_file = open('data/test.json', 'r')
test_recipes = ast.literal_eval(data_file.read())

CUISINE_DISTRIBUTION = get_cuisine_distribution(train_recipes)

train_recipes = list_to_string_ingredients(train_recipes)
test_recipes = list_to_string_ingredients(test_recipes)

index_recipes_in_ES('train-recipe', train_recipes)
index_recipes_in_ES('test-recipe', test_recipes)

results = predict(test_recipes, diff_distribution, 15)
to_csv(results)

In this part I provide some code allowing you to split your training test in 2 parts to be able to optimize your parameters
-----

In [8]:
# Compute your sucess rate
def get_result_test(results, random_recipes_test):
    succes = []
    fail = []
    for result in results:
        real_cuisine = get_document_by_id('test-recipe', result[1])
        predict_cuisine = result[0]
        if real_cuisine['cuisine'] == predict_cuisine:
            succes.append(real_cuisine)
        else:
            fail.append(real_cuisine)
    succes = Result_percentage(succes, float(len(random_recipes_test))) 
    fail = Result_percentage(fail, float(len(random_recipes_test))) 
    return succes, fail

class Result_percentage:
    def __init__(self, recipes, length_test):
        distribution = {}
        for recipe in recipes:
            cuisine = recipe['cuisine']
            if cuisine not in distribution:
                distribution[cuisine] = 1
            else:
                distribution[cuisine] += 1
        for cuisine in distribution:
            distribution[cuisine] = distribution[cuisine] * 100 / float(len(recipes))
        self.distribution = distribution
        self.percentage = len(recipes) / length_test * 100     

In [None]:
data_file = open('data/train.json', 'r')
train_recipes = ast.literal_eval(data_file.read())
data_file = open('data/test.json', 'r')
test_recipes = ast.literal_eval(data_file.read())

nb_train_recipes = len(train_recipes)

random_recipes_test = []
for generated_test_nb in range(0, nb_train_recipes/3):
    recipe = random.choice(train_recipes)
    while recipe in random_recipes_test:
        recipe = random.choice(train_recipes)
    random_recipes_test.append(recipe)

random_recipes_train = []
for recipe in train_recipes:
    if recipe not in random_recipes_test:
        random_recipes_train.append(recipe)
        
CUISINE_DISTRIBUTION = get_cuisine_distribution(random_recipes_train)

random_recipes_train = list_to_string_ingredients(random_recipes_train)
random_recipes_test = list_to_string_ingredients(random_recipes_test)

index_recipes_in_ES('train-recipe', random_recipes_train)
index_recipes_in_ES('test-recipe', random_recipes_test)

results = predict(random_recipes_test, diff_distribution, 15)
score = get_result_test(results, random_recipes_test)

print score[0].distribution
print score[0].percentage, "Success"
print "--------------"
print score[1].distribution
print score[1].percentage, "Failure"