## Word game

The goal is to help the player by writing a program that can alphabetically list all the words of given length with the given prefix. Your solution should also work with large dictionaries and should effectively search many subsequent queries.



---

In [1]:
# Import libraries

import csv
import gzip
import pandas as pd
import numpy as np

In [2]:
# Create dataframes

df_input = pd.read_csv('~/Documents/GitHub/various-projects/data/input.txt', names=['prefixLength'])
df_dictionary = pd.read_csv('~/Documents/GitHub/various-projects/data/dictionary.txt', names=['word'])

In [3]:
# Quick look at df_input 

df_input.head()

Unnamed: 0,prefixLength
0,hat 5
1,wa 3
2,x 10
3,bob 7
4,i 2


In [4]:
# Show df_input info

df_input.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 1 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   prefixLength  1000 non-null   object
dtypes: object(1)
memory usage: 7.9+ KB


In [5]:
# Create two columns for df_inpunts with prefix and length

df_input.dropna(inplace = True)
new_df_input = df_input["prefixLength"].str.split(" ", n = 1, expand = True)
new_df_input.columns = ['prefix', 'length']
new_df_input["length"] = new_df_input.length.astype(int)
new_df_input.head()

Unnamed: 0,prefix,length
0,hat,5
1,wa,3
2,x,10
3,bob,7
4,i,2


In [6]:
# Quick look at df_dictionary

df_dictionary.head()

Unnamed: 0,word
0,a
1,aardvark
2,aardvarks
3,abaci
4,aback


In [7]:
# Show df_dictionary info

df_dictionary.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62887 entries, 0 to 62886
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   word    62886 non-null  object
dtypes: object(1)
memory usage: 491.4+ KB


In [8]:
# Simple example of all "hat" prefix in df_dictionary

all_hat = df_dictionary[df_dictionary['word'].str.count('^hat')>0]
all_hat.head()

Unnamed: 0,word
25250,hat
25251,hatch
25252,hatchback
25253,hatchbacks
25254,hatched


In [9]:
# Count of all "hat" prefix in df_dictionary 

len(all_hat)

30

In [10]:
# Simple count of characters in the  "hat" prefix dataframe

import warnings
warnings.filterwarnings("ignore")

all_hat['count'] = all_hat['word'].str.len()
all_hat.head()

Unnamed: 0,word,count
25250,hat,3
25251,hatch,5
25252,hatchback,9
25253,hatchbacks,10
25254,hatched,7


In [11]:
# Only show "hat" prefix with predefined lenght and sort in ascending order

show_hat = all_hat[all_hat["count"] == 5]
show_hat.sort_values(by='word', ascending=True)
show_hat

Unnamed: 0,word,count
25251,hatch,5
25264,hated,5
25268,hater,5
25270,hates,5


In [12]:
# Save dataframe "show_hat" to csv and zip

show_hat.to_csv('~/Documents/GitHub/various-projects/data/hat.csv.gz', compression = 'gzip')


---

In [13]:
# Make it automatic for all prefixes with predefined length

# Import libraries

import csv
import gzip
import pandas as pd
import numpy as np

# Create dataframes

df_input = pd.read_csv('~/Documents/GitHub/various-projects/data/input.txt', names=['prefixLength'])
df_dictionary = pd.read_csv('~/Documents/GitHub/various-projects/data/dictionary.txt', names=['word'])

df_input.dropna(inplace = True)
new_df_input = df_input["prefixLength"].str.split(" ", n = 1, expand = True)
new_df_input.columns = ['prefix', 'length']
new_df_input["length"] = new_df_input.length.astype(int)

In [14]:
# Head of new_df_input

new_df_input.head()

Unnamed: 0,prefix,length
0,hat,5
1,wa,3
2,x,10
3,bob,7
4,i,2


In [15]:
# Head of df_dictionary

df_dictionary.head()

Unnamed: 0,word
0,a
1,aardvark
2,aardvarks
3,abaci
4,aback


In [16]:
# Create multiple lists

list_of_prefix = new_df_input['prefix'].to_list()
list_of_length = new_df_input['length'].to_list()
list_of_dictionary = df_dictionary['word'].to_list()

In [17]:
# Match lists of prefix & dictionary

list_of_match =[]

for i in range(len(list_of_prefix)):
    for j in range(len(list_of_dictionary)):
        if list_of_prefix[i]==list_of_dictionary[j]:
            list_of_match.append(i)

In [18]:
# How many matches 

len(list_of_match)

541

In [19]:
# Create dataframe of match

df_match = new_df_input.iloc[list_of_match, :]
df_match.head()

Unnamed: 0,prefix,length
0,hat,5
2,x,10
3,bob,7
4,i,2
5,z,8


In [20]:
# Merge dataframes 

# pd.merge(df_match, df_dictionary, how='left', left_on='prefix', right_on='word')

# To be continued : )