In [2]:
 import util
 import math

 class Entropy(object):

    def __init__(self):
        self.wd = util.Wordle()
        self.allowed = self.wd._allowed
        self.totalWord = len(self.allowed)
    
    def findWordProb(self, guessWord, wordslist = None):
        if wordslist is None:
            wordslist = self.allowed
        wordprob = {}
        for word in wordslist:
            key = self.wd.compareWord(guessWord,word)
            if key in wordprob:
                wordprob[key].append(word)
            else:
                wordprob[key] = [word]
        
        wordprob = {key: len(value)/self.totalWord for key, value in wordprob.items()}
        return wordprob

    def calEntropy(self,wordprob):
        eI = 0
        for p in wordprob:
            eI += p * math.log2(1/p)
        return eI
    
    
    
    def run(self):
        entropy = {}
        for word in self.allowed:
            eI = self.calEntropy(self.findWordProb(word).values())
            entropy['word'] = eI
            print(word,eI)
        return entropy


In [3]:
e = Entropy()

e.allowed

['aahed',
 'aalii',
 'aargh',
 'aarti',
 'abaca',
 'abaci',
 'aback',
 'abacs',
 'abaft',
 'abaka',
 'abamp',
 'aband',
 'abase',
 'abash',
 'abask',
 'abate',
 'abaya',
 'abbas',
 'abbed',
 'abbes',
 'abbey',
 'abbot',
 'abcee',
 'abeam',
 'abear',
 'abele',
 'abers',
 'abets',
 'abhor',
 'abide',
 'abies',
 'abled',
 'abler',
 'ables',
 'ablet',
 'ablow',
 'abmho',
 'abode',
 'abohm',
 'aboil',
 'aboma',
 'aboon',
 'abord',
 'abore',
 'abort',
 'about',
 'above',
 'abram',
 'abray',
 'abrim',
 'abrin',
 'abris',
 'absey',
 'absit',
 'abuna',
 'abune',
 'abuse',
 'abuts',
 'abuzz',
 'abyes',
 'abysm',
 'abyss',
 'acais',
 'acari',
 'accas',
 'accoy',
 'acerb',
 'acers',
 'aceta',
 'achar',
 'ached',
 'aches',
 'achoo',
 'acids',
 'acidy',
 'acing',
 'acini',
 'ackee',
 'acker',
 'acmes',
 'acmic',
 'acned',
 'acnes',
 'acock',
 'acold',
 'acorn',
 'acred',
 'acres',
 'acrid',
 'acros',
 'acted',
 'actin',
 'acton',
 'actor',
 'acute',
 'acyls',
 'adage',
 'adapt',
 'adaws',
 'adays',


In [55]:
import string
import pandas as pd


def calWordScore(allowed):
        frequency = { ch : [0] * 5 for ch in string.ascii_lowercase}
        for word in allowed:
            for i in range(0,5):
                frequency[word[i]][i] += 1
        
        matrix  = pd.DataFrame(frequency).T
        matrix = matrix/matrix.sum(axis =0)
        print(matrix)
        matrix = matrix.T.to_dict()

        wordScore = {}
        for word in allowed:
            score = 1
            for i in range(0,5):
                score += 1/matrix[word[i]][i]
            wordScore.update({word: score})
        #print(wordScore)
        return wordScore,matrix

In [27]:
def readfiles(file):
        with open(file,'r') as fh:
            lines = fh.readlines()
        return [line.rstrip('\n') for line in lines]

In [52]:
allowed = readfiles("word_frequency _.txt")

In [56]:
wf, _ = calWordScore(allowed)
wf

          0         1         2         3         4
a  0.041954  0.168829  0.171666  0.054114  0.009931
b  0.090191  0.005067  0.020470  0.015403  0.001216
c  0.089177  0.017430  0.015606  0.064856  0.004256
d  0.033239  0.003040  0.029591  0.033441  0.040130
e  0.021078  0.087556  0.070126  0.198622  0.221929
f  0.088772  0.001824  0.013985  0.017025  0.009728
g  0.067086  0.006283  0.028172  0.030199  0.015809
h  0.035266  0.051277  0.002229  0.012363  0.049858
i  0.006688  0.087150  0.109850  0.027158  0.000811
j  0.018038  0.000000  0.001013  0.000811  0.000000
k  0.004662  0.004054  0.008918  0.024929  0.043170
l  0.036482  0.082692  0.039724  0.074990  0.054722
m  0.032428  0.016214  0.018849  0.025942  0.007702
n  0.013985  0.022700  0.051682  0.077219  0.033441
o  0.005878  0.145318  0.107418  0.032833  0.008715
p  0.053709  0.029996  0.022902  0.020876  0.020470
q  0.004256  0.000608  0.000203  0.000000  0.000000
r  0.045197  0.130320  0.065261  0.060803  0.147953
s  0.187272 

{'greet': 54.59427476953194,
 'boxer': 160.6047941878812,
 'scree': 88.57553118015267,
 'joist': 96.11841688825675,
 'jewel': 140.01975164362725,
 'space': 65.42761639417151,
 'scaly': 88.03340850777404,
 'crane': 43.16837522353681,
 'axial': 423.12111607719,
 'drake': 89.20379461817456,
 'grass': 64.54506336828752,
 'stove': 75.96493729242351,
 'curse': 58.924626369371296,
 'queer': 276.91243895733356,
 'soggy': 86.99286203151006,
 'story': 58.80286686325073,
 'equal': 1749.8381576841361,
 'slime': 70.58909620099848,
 'sweep': 136.94181767536077,
 'catch': 74.51924586146609,
 'found': 76.99132232560545,
 'death': 84.15241663726837,
 'furry': 64.10192049275962,
 'deign': 114.62710744592701,
 'clock': 72.19927682962877,
 'niche': 233.45076903408355,
 'batty': 59.842255519031404,
 'trope': 91.21025396732443,
 'scare': 90.48978824625615,
 'sappy': 108.99071340916122,
 'havoc': 325.60785410181717,
 'daunt': 81.65408982135625,
 'canon': 97.84564566567367,
 'motel': 82.93449810078711,
 'love

In [38]:
wf, _ = calWordScore(e.allowed)

pd.DataFrame(_).T

          0         1         2         3         4
a  0.056815  0.174453  0.095282  0.082794  0.052421
b  0.070074  0.006244  0.025825  0.018733  0.004548
c  0.071076  0.013568  0.030219  0.031684  0.009790
d  0.052806  0.006475  0.030065  0.036309  0.063444
e  0.023358  0.125501  0.067993  0.179386  0.117330
f  0.046099  0.001850  0.013722  0.017962  0.006321
g  0.049183  0.005859  0.028060  0.032609  0.011024
h  0.037697  0.042091  0.009251  0.018116  0.028523
i  0.012720  0.106614  0.081021  0.067838  0.021585
j  0.015572  0.000848  0.003546  0.002236  0.000231
k  0.028986  0.007323  0.020968  0.038776  0.019966
l  0.044480  0.053885  0.065372  0.059436  0.036694
m  0.053423  0.014493  0.039393  0.030990  0.014030
n  0.025054  0.026596  0.074314  0.060746  0.040857
o  0.020197  0.161579  0.076549  0.053808  0.029988
p  0.066220  0.017808  0.028060  0.032223  0.011332
q  0.006013  0.001156  0.001002  0.000154  0.000308
r  0.048412  0.072464  0.092353  0.055427  0.051881
s  0.120644 

Unnamed: 0,0,1,2,3,4
a,0.056815,0.174453,0.095282,0.082794,0.052421
b,0.070074,0.006244,0.025825,0.018733,0.004548
c,0.071076,0.013568,0.030219,0.031684,0.00979
d,0.052806,0.006475,0.030065,0.036309,0.063444
e,0.023358,0.125501,0.067993,0.179386,0.11733
f,0.046099,0.00185,0.013722,0.017962,0.006321
g,0.049183,0.005859,0.02806,0.032609,0.011024
h,0.037697,0.042091,0.009251,0.018116,0.028523
i,0.01272,0.106614,0.081021,0.067838,0.021585
j,0.015572,0.000848,0.003546,0.002236,0.000231
