In [1]:
import pandas as pd
import numpy as np
from collections import Counter as ctr

In [2]:
data = pd.read_csv('timelymealsdiscreteannotated.csv', skiprows=1, names=['type','text', 'c3', 'c4', 'c5'], delimiter=',', encoding='ISO-8859-1')

In [3]:
data['clean'] = data.text.apply(lambda x: x.lower().split())

In [4]:
data.drop(['c3', 'c4', 'c5'], axis=1, inplace=True)

In [5]:
data

Unnamed: 0,type,text,clean
0,Valid,Generate me a meal plan for 3 days,"[generate, me, a, meal, plan, for, 3, days]"
1,Valid,Give me a meal plan for a week and 2 meals per...,"[give, me, a, meal, plan, for, a, week, and, 2..."
2,Valid,Meal plan for the week,"[meal, plan, for, the, week]"
3,Valid,Give me a meal plan for the day,"[give, me, a, meal, plan, for, the, day]"
4,Valid,Generate me a meal plan for the weekend,"[generate, me, a, meal, plan, for, the, weekend]"
...,...,...,...
203,Invalid,Help me eat more vegetables.,"[help, me, eat, more, vegetables.]"
204,Invalid,What meals freeze well for later?,"[what, meals, freeze, well, for, later?]"
205,Invalid,Suggest food that helps with energy levels.,"[suggest, food, that, helps, with, energy, lev..."
206,Invalid,Recommend comfort food dishes.,"[recommend, comfort, food, dishes.]"


In [6]:
data.groupby('type').count()

Unnamed: 0_level_0,text,clean
type,Unnamed: 1_level_1,Unnamed: 2_level_1
Invalid,59,59
Valid,149,149


In [7]:
type_ctr = ctr(data.type)

type_ctr['Invalid']/len(data), type_ctr['Valid']/len(data)

(0.28365384615384615, 0.7163461538461539)

In [8]:
test = data.sample(frac=0.1)
train = data[~data.index.isin(test.index)]

train.shape, test.shape

((187, 3), (21, 3))

### Prior probability

In [9]:
counter = ctr(train.type)

def Pa(X=''):
    return counter[X] / len(train)

In [10]:
Pa('Valid')

0.732620320855615

### Normalization

In [11]:
words_ctr = ctr([word for row in train.clean for word in row])

def Pb(W=''):
    if W not in words_ctr: return 0.000001
    return words_ctr[W] / sum(words_ctr.values())


words_ctr

Counter({'for': 152,
         'a': 147,
         'plan': 126,
         'meal': 114,
         'the': 79,
         'next': 57,
         'meals': 49,
         'days': 31,
         'me': 28,
         'with': 24,
         'week': 23,
         'days,': 19,
         'create': 17,
         'generate': 16,
         'suggest': 15,
         'i': 15,
         'give': 13,
         'and': 13,
         'five': 13,
         'seven': 12,
         'including': 12,
         'on': 12,
         'weekend': 11,
         'three': 11,
         'what': 11,
         'four': 10,
         '7': 9,
         'focusing': 9,
         'schedule': 8,
         'dinners': 8,
         'how': 8,
         'day': 7,
         'one': 7,
         'week,': 7,
         'six': 7,
         'make': 7,
         'can': 7,
         'healthy': 6,
         'design': 6,
         'family': 6,
         'provide': 6,
         'to': 6,
         'all': 6,
         'breakfasts': 6,
         '3': 5,
         'balanced': 5,
         'monday': 5,
  

### Maximum Likelihood

In [12]:
words_valid_invalid = {}

for col_name in list(set(data.type)):
    sub_df = train[train.type == col_name] #masking data type to only be ham sub-datatype
    words_valid_invalid[col_name] = []
    for row in sub_df.clean:
        for word in row:
            words_valid_invalid[col_name].append(word)
    words_valid_invalid[col_name] = ctr(words_valid_invalid[col_name])

def Pba(W='', X=''):
    t = words_valid_invalid[X]
    if W not in t: return 0.0000001
    return t[W] / sum(t.values())


words_valid_invalid['Valid']

Counter({'for': 140,
         'plan': 125,
         'a': 124,
         'meal': 113,
         'the': 76,
         'next': 57,
         'meals': 39,
         'days': 31,
         'week': 23,
         'me': 19,
         'with': 19,
         'days,': 19,
         'generate': 16,
         'create': 15,
         'and': 13,
         'five': 13,
         'seven': 12,
         'including': 12,
         'give': 11,
         'weekend': 11,
         'three': 11,
         'suggest': 10,
         'four': 10,
         'on': 10,
         '7': 9,
         'focusing': 9,
         'dinners': 8,
         'day': 7,
         'schedule': 7,
         'one': 7,
         'week,': 7,
         'six': 7,
         'design': 6,
         'family': 6,
         'provide': 6,
         'breakfasts': 6,
         '3': 5,
         'balanced': 5,
         'healthy': 5,
         'monday': 5,
         'snacks': 5,
         'organize': 5,
         'make': 5,
         'i': 5,
         'all': 5,
         'lunches': 5,
         'u

In [13]:
Pba(W='the', X='Valid')

0.058596761757902856

In [14]:
def Pab(X='', W=''):
    return Pba(W, X) * Pa(X) / Pb(W)

In [15]:
Pab(X='Valid', W='the')

0.8786896388997809

In [16]:
def Ps(T, X=''):
    return np.prod([Pab(X=X, W=word) for word in T])

In [17]:
Ps(['make', 'meal', 'plan', 'for', 'week'], 'Valid')

np.float64(0.41125812797763356)

### Evaluate

In [18]:
test['Valid'] = test.clean.apply(lambda x: Ps(x, 'Valid'))
test['Invalid'] = test.clean.apply(lambda x: Ps(x, 'Invalid'))

In [19]:
test['winner'] = test.Valid /  test.Invalid
test['winner'] = test.winner.apply(lambda x: 'Valid' if x>1 else 'Invalid')

In [20]:
result = test.winner == test.type

In [21]:
sum(result) / len(test)

0.9523809523809523

In [22]:
test

Unnamed: 0,type,text,clean,Valid,Invalid,winner
25,Valid,Provide a week-long menu with three meals per day,"[provide, a, week-long, menu, with, three, mea...",0.001512921,2.9572339999999996e-26,Valid
165,Invalid,What's a good diet for building muscle?,"[what's, a, good, diet, for, building, muscle?]",1.48645e-11,1.272322e-09,Invalid
24,Valid,Outline a meal plan for the next five lunches ...,"[outline, a, meal, plan, for, the, next, five,...",0.271286,6.231005e-40,Valid
207,Invalid,What can I make for a romantic dinner at home?,"[what, can, i, make, for, a, romantic, dinner,...",1.55948e-10,4.224595e-08,Invalid
177,Invalid,What is the DASH diet?,"[what, is, the, dash, diet?]",1.945892e-15,0.003383579,Invalid
197,Invalid,Suggest meals using only 5 ingredients.,"[suggest, meals, using, only, 5, ingredients.]",1.424488e-06,0.0009099358,Invalid
97,Valid,Generate a meal plan for the next six days,"[generate, a, meal, plan, for, the, next, six,...",0.3251842,2.596021e-30,Valid
5,Valid,Plan 3 meals per day for the next month,"[plan, 3, meals, per, day, for, the, next, month]",0.02482847,2.5260200000000002e-28,Valid
200,Invalid,Give me ideas for packed lunches.,"[give, me, ideas, for, packed, lunches.]",0.001317019,3.10026e-06,Valid
139,Valid,"Provide a meal plan for the next week, with da...","[provide, a, meal, plan, for, the, next, week,...",0.01559633,4.7856160000000005e-33,Valid
