# Wordle Analysis

## 1. Import packages

In [1]:
import requests
import pandas as pd
import numpy as np
from datetime import datetime
import swat

## Custom personal module to connect to my CAS environment
try:
    from casConnect import connect_to_cas 
except:
    print('CasConnect package not available')

## 2. Get Wordle word list

In [2]:
## Link to the acceptable word list in Wordle
url = 'https://www.nytimes.com/games-assets/v2/wordle.303bf7c0c5548c4f655a.js'
r = requests.get(url)
wordleSiteContents = r.text

## The Wordle list is a list after va=
findStartPosition = 'va=['
findEndPosition = ']'

## Find the start position and end position of the list
startPosition = wordleSiteContents.find(findStartPosition) + len(findStartPosition)
endPosition = wordleSiteContents.find(findEndPosition, startPosition)


## Use the startPosition and endPosition to extract the words from the list.
## Then create a list of words from the Wordle js file
wordleWordList = (wordleSiteContents[startPosition:endPosition]  ## Extract word list from NYTimes js file
                 .upper()                                        ## Upper case all words
                 .replace('"','')                                ## Remove quotes around each word
                 .split(','))                                    ## Create a list by splitting on the comma

## Preview the list
display(f'Total acceptable words in Wordle list: {len(wordleWordList)}', 
        f'Preview new list: {wordleWordList[0:10]}')

'Total acceptable words in Wordle list: 14855'

"Preview new list: ['AAHED', 'AALII', 'AAPAS', 'AARGH', 'AARTI', 'ABACA', 'ABACI', 'ABACS', 'ABAFT', 'ABAHT']"

## 3. Prepare the DataFrame

In [3]:
word_df = pd.DataFrame({'word':wordleWordList})
word_df.head()

Unnamed: 0,word
0,AAHED
1,AALII
2,AAPAS
3,AARGH
4,AARTI


In [4]:
def top5Letters(dataframe, column):
    '''
    Returns a list of the top 5 letters in the specific column
    
    dataframe : specifies the dataframe with the Worlde data
    column    : the column that will be analyzed to find the top5 letters in that column
    '''
    top5Letters = dataframe[column].value_counts()[:5].index.to_list()
    return top5Letters


## Prepare the final data
wordle_df_wide = (word_df
                  .assign(
                     firstLetter = word_df.word.str[0],
                     secondLetter = word_df.word.str[1],
                     thirdLetter = word_df.word.str[2],
                     fourthLetter = word_df.word.str[3],
                     fifthLetter = word_df.word.str[4],
                     top5FirstLetter = lambda _df: np.select([_df.firstLetter.isin(top5Letters(_df, 'firstLetter'))], [1]),
                     top5SecondLetter = lambda _df: np.select([_df.secondLetter.isin(top5Letters(_df, 'secondLetter'))], [1]),
                     top5ThirdLetter = lambda _df: np.select([_df.thirdLetter.isin(top5Letters(_df, 'thirdLetter'))], [1]),
                     top5FourthLetter = lambda _df: np.select([_df.fourthLetter.isin(top5Letters(_df, 'fourthLetter'))], [1]),
                     top5FifthLetter = lambda _df: np.select([_df.fifthLetter.isin(top5Letters(_df, 'fifthLetter'))], [1]),
                     numberOfTop5Letters = lambda _df: _df.loc[:,['top5FirstLetter','top5SecondLetter','top5ThirdLetter','top5FourthLetter','top5FifthLetter']].sum(axis = 1)
                  )
                 )

wordle_df_wide

Unnamed: 0,word,firstLetter,secondLetter,thirdLetter,fourthLetter,fifthLetter,top5FirstLetter,top5SecondLetter,top5ThirdLetter,top5FourthLetter,top5FifthLetter,numberOfTop5Letters
0,AAHED,A,A,H,E,D,0,1,0,1,1,3
1,AALII,A,A,L,I,I,0,1,0,1,0,2
2,AAPAS,A,A,P,A,S,0,1,0,1,1,3
3,AARGH,A,A,R,G,H,0,1,1,0,0,2
4,AARTI,A,A,R,T,I,0,1,1,1,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...
14850,BUTCH,B,U,T,C,H,1,1,0,0,0,2
14851,STALK,S,T,A,L,K,1,0,1,0,0,2
14852,FLACK,F,L,A,C,K,0,0,1,0,0,1
14853,WIDOW,W,I,D,O,W,0,1,0,0,0,1


In [5]:
wordle_df_long = pd.melt(wordle_df_wide.loc[:,['word','firstLetter','secondLetter','thirdLetter','fourthLetter','fifthLetter']], 
                         id_vars=['word'],
                         var_name='letterPosition',
                         value_name='letter',
                         value_vars=['firstLetter','secondLetter','thirdLetter','fourthLetter','fifthLetter'])
wordle_df_long

Unnamed: 0,word,letterPosition,letter
0,AAHED,firstLetter,A
1,AALII,firstLetter,A
2,AAPAS,firstLetter,A
3,AARGH,firstLetter,A
4,AARTI,firstLetter,A
...,...,...,...
74270,BUTCH,fifthLetter,H
74271,STALK,fifthLetter,K
74272,FLACK,fifthLetter,K
74273,WIDOW,fifthLetter,W


In [6]:
wordle_df_long.assign(top5letter = np.where(wordle_df_long.letter.isin(top5Letters(wordle_df_long,'letter')),1,0))      ## Leave blank for false

Unnamed: 0,word,letterPosition,letter,top5letter
0,AAHED,firstLetter,A,1
1,AALII,firstLetter,A,1
2,AAPAS,firstLetter,A,1
3,AARGH,firstLetter,A,1
4,AARTI,firstLetter,A,1
...,...,...,...,...
74270,BUTCH,fifthLetter,H,0
74271,STALK,fifthLetter,K,0
74272,FLACK,fifthLetter,K,0
74273,WIDOW,fifthLetter,W,0


In [7]:
wordle_df_long[wordle_df_long.word.str.contains(r'^(?=.*A)(?=.*E)(?=.*S)(?=.*O)(?=.*R)')]

Unnamed: 0,word,letterPosition,letter
157,AEROS,firstLetter,A
10186,SOARE,firstLetter,S
13772,AROSE,firstLetter,A
15012,AEROS,secondLetter,E
25041,SOARE,secondLetter,O
28627,AROSE,secondLetter,R
29867,AEROS,thirdLetter,R
39896,SOARE,thirdLetter,A
43482,AROSE,thirdLetter,O
44722,AEROS,fourthLetter,O


In [8]:
letterColumns = ['firstLetter','secondLetter','thirdLetter','fourthLetter','fifthLetter']
myData = {}
for col in letterColumns:
    myData[col] =  top5Letters(wordle_df_wide, col)

top5_df = pd.DataFrame(myData)
top5_df

Unnamed: 0,firstLetter,secondLetter,thirdLetter,fourthLetter,fifthLetter
0,S,A,A,E,S
1,P,O,R,A,E
2,B,E,I,I,Y
3,C,I,N,T,A
4,M,U,O,N,D


## 4. Save the CSV file to local storage as a backup

In [9]:
## Get today's date
todaysDate = datetime.today().strftime('%Y-%m-%d')

## Set the file names
wordle_wide_filename  = todaysDate + '_wordle_wide.csv'
wordle_long_filename = todaysDate + '_wordle_long.csv'

## Display the file names for confirmation
display(wordle_wide_filename, wordle_long_filename)

## Save the dataframes back to disk as CSV files
wordle_df_wide.to_csv('data/' + wordle_wide_filename, index = False)
wordle_df_long.to_csv('data/' + wordle_long_filename, index = False)

'2023-05-30_wordle_wide.csv'

'2023-05-30_wordle_long.csv'

## 5. Upload the DataFrame to CAS and save in the Casuser caslib

a. Connect to the CAS server

In [32]:
conn = connect_to_cas()
type(conn)

swat.cas.connection.CAS

In [33]:
conn.about()

NOTE: Grid node action status report: 5 nodes, 9 total actions executed.


Unnamed: 0,name,role,uptime,running,stalled
0,worker-2.sas-cas-server-default.ssemonthly.svc...,worker,1.843,0,0
1,worker-3.sas-cas-server-default.ssemonthly.svc...,worker,1.843,0,0
2,worker-1.sas-cas-server-default.ssemonthly.svc...,worker,1.843,0,0
3,worker-0.sas-cas-server-default.ssemonthly.svc...,worker,1.843,0,0
4,controller.sas-cas-server-default.ssemonthly.s...,controller,1.883,0,0

Unnamed: 0,nodes,actions
0,5,9


b. Upload the pandas DataFrame as a CAS table

In [39]:
## Load and save the wordle_wide table as a csv
wordle_wide_castbl = conn.upload_frame(wordle_df_wide, 
                                       casout = {'name':'wordle_wide',
                                                 'caslib':'casuser',
                                                 'replace':True})

wordle_wide_castbl.save(name = wordle_wide_filename, caslib = 'casuser', replace = True)


## Load and save the wordle_long table as a csv
wordle_long_castbl = conn.upload_frame(wordle_df_long,
                                       casout = {'name':'wordle_wide',
                                                 'caslib':'casuser',
                                                 'replace':True})

wordle_wide_castbl.save(name = wordle_long_filename, caslib = 'casuser', replace = True)

NOTE: Cloud Analytic Services made the uploaded file available as table WORDLE_WIDE in caslib CASUSER(Peter.Styliadis@sas.com).
NOTE: The table WORDLE_WIDE has been created in caslib CASUSER(Peter.Styliadis@sas.com) from binary data uploaded to Cloud Analytic Services.
NOTE: Cloud Analytic Services saved the file 2023-05-29_wordle_wide.csv in caslib CASUSER(Peter.Styliadis@sas.com).
NOTE: Cloud Analytic Services made the uploaded file available as table WORDLE_WIDE in caslib CASUSER(Peter.Styliadis@sas.com).
NOTE: The table WORDLE_WIDE has been created in caslib CASUSER(Peter.Styliadis@sas.com) from binary data uploaded to Cloud Analytic Services.
NOTE: Cloud Analytic Services saved the file 2023-05-29_wordle_long.csv in caslib CASUSER(Peter.Styliadis@sas.com).


In [44]:
conn.fileInfo(caslib = 'casuser', path = '%wordle%')

Unnamed: 0,Permission,Owner,Group,Name,Size,Encryption,Time,ModTime
0,drwxr-xr-x,sas,sas,csv_file_blogs,4096,,2023-05-12T12:51:01+00:00,1999515000.0
1,-rwxr-xr-x,sas,sas,2023-05-29_wordle_wide.csv,463936,,2023-05-29T11:14:20+00:00,2000978000.0
2,-rwxr-xr-x,sas,sas,2023-05-29_wordle_long.csv,1515237,,2023-05-29T11:14:21+00:00,2000978000.0


d. Terminate the CAS session

In [192]:
conn.terminate()

## 6. Open SAS Visual Analytics and Visualize the Data