The purpose of this notebook is to take a list of:

1. 9-letter words from the Official Scrabble Players Dictionary (OSPD)
2. Exclude Plurals
3. Exclude words that exceed the number of tiles in a Scrabble set

And find the list of highest scoring words given these parameters.

In [324]:
import pandas as pd
import numpy as np

In [325]:
ospd = pd.read_csv('ospd.txt')
ospd = ospd[(ospd['Word'].str.len() == 9)].reset_index().drop('index',axis=1)

In [326]:
ospd.head()

Unnamed: 0,Word
0,aardvarks
1,aasvogels
2,abamperes
3,abandoned
4,abandoner


In [327]:
#Remove Plurals

import inflect
p = inflect.engine()
word_list = list(filter(lambda item: p.singular_noun(item) == False,ospd['Word']))

In [328]:
word_list[:5]

['abandoned', 'abandoner', 'abasement', 'abashment', 'abatement']

# Scrabble Tile Values

In [329]:
tile_dict = {"a":{"value":1,"count":9},
             "b":{"value":3,"count":2},
             "c":{"value":2,"count":2},
             "d":{"value":2,"count":4},
             "e":{"value":1,"count":12},
             "f":{"value":4,"count":2},
             "g":{"value":2,"count":3},
             "h":{"value":4,"count":2},
             "i":{"value":1,"count":9},
             "j":{"value":8,"count":1},
             "k":{"value":5,"count":1},
             "l":{"value":1,"count":4},
             "m":{"value":3,"count":2},
             "n":{"value":1,"count":6},
             "o":{"value":1,"count":8},
             "p":{"value":3,"count":2},
             "q":{"value":10,"count":1},
             "r":{"value":1,"count":6},
             "s":{"value":1,"count":4},
             "t":{"value":1,"count":6},
             "u":{"value":1,"count":4},
             "v":{"value":4,"count":2},
             "w":{"value":4,"count":2},
             "x":{"value":8,"count":1},
             "y":{"value":4,"count":2},
             "z":{"value":10,"count":1}
            }

In [330]:
tile_dict["a"]["value"]

1

In [331]:
tiledf = pd.DataFrame.from_dict(tile_dict, orient='index')
tiledf

Unnamed: 0,value,count
a,1,9
b,3,2
c,2,2
d,2,4
e,1,12
f,4,2
g,2,3
h,4,2
i,1,9
j,8,1


In [332]:
#Need a new version of word_list using the subindex version
value_list = [[sum(tile_dict[i]["value"] for i in word_list[r])]for r in range(len(word_list))]
value_list[:5]

[[13], [12], [13], [16], [13]]

In [343]:
#Determine if letter count exceeds possible letter count

letter_count = [[word_list[r].count(word_list[r][i]) <= tile_dict[word_list[r][i]]["count"] for i in range(len(word_list[0]))] for r in range(len(word_list))]

In [335]:
#Check for any instances where a letter is False, i.e. exceeds the letter count.
letter_check = [all(letter_count[i]) for i in range(len(word_list))]
letter_check[:10]

#all(letter_count[99])

[True, True, True, True, True, True, True, True, True, True]

In [336]:
d = {'Word':word_list,'Value':value_list,'Letter Check':letter_check}

In [337]:
df = pd.DataFrame(d)
df.head()

Unnamed: 0,Word,Value,Letter Check
0,abandoned,[13],True
1,abandoner,[12],True
2,abasement,[13],True
3,abashment,[16],True
4,abatement,[13],True


In [338]:
df[df['Word']=='showbizzy']

Unnamed: 0,Word,Value,Letter Check
11550,showbizzy,[38],False


In [339]:
df_valid = df[df['Letter Check']==True]

# Jumble Words with the Highest Score in Scrabble

In [340]:
df_valid.sort_values('Value',ascending=False)

Unnamed: 0,Word,Value,Letter Check
4537,exoenzyme,[30],True
7366,maximized,[30],True
6696,jarovized,[29],True
6845,krummholz,[29],True
7367,maximizer,[29],True
6702,jayhawker,[29],True
10889,rhythmize,[29],True
4486,exchequer,[29],True
6795,kibbutzim,[28],True
4326,equalized,[28],True
