# Wordle Analysis

## 1. Import packages

In [56]:
import requests
import pandas as pd
import numpy as np
from datetime import datetime
import swat

## Custom personal module to connect to my CAS environment
try:
    from casConnect import connect_to_cas 
except:
    print('CasConnect package not available')

## 2. Get Wordle word list

In [None]:
url = 'https://www.nytimes.com/games-assets/v2/wordle.303bf7c0c5548c4f655a.js'
r = requests.get(url)

wordleSiteContents = r.text

In [6]:
findStartPosition = 'va=['
findEndPosition = ']'

startPosition = wordleSiteContents.find(findStartPosition) + len(findStartPosition)
endPosition = wordleSiteContents.find(findEndPosition, startPosition)

display(f'StartPosition: {startPosition}', f'End Position: {endPosition}')

'StartPosition: 12239'

'End Position: 131078'

In [9]:
## Create a list of words from the Wordle js file
wordleWordList = (wordleSiteContents[startPosition:endPosition]  ## Extract word list from NYTimes js file
                 .upper()                                        ## Upper case all words
                 .replace('"','')                                ## Remove quotes around each word
                 .split(','))                                    ## Create a list by splitting on the comma


## Preview the list
display(f'Total acceptable words in Wordle list: {len(wordleWordList)}', f'Preview new list: {wordleWordList[0:5]}')

'Total acceptable words in Wordle list: 14855'

"Preview new list: ['AAHED', 'AALII', 'AAPAS', 'AARGH', 'AARTI']"

## 3. Prepare the DataFrame

In [10]:
word_df = pd.DataFrame({'word':wordleWordList})
word_df.head()

Unnamed: 0,word
0,AAHED
1,AALII
2,AAPAS
3,AARGH
4,AARTI


In [43]:
final_df = (word_df
           .assign(
               firstLetter = word_df.word.str[0],
               secondLetter = word_df.word.str[1],
               thirdLetter = word_df.word.str[2],
               fourthLetter = word_df.word.str[3],
               fifthLetter = word_df.word.str[4],
               top5FirstLetter = lambda _df: np.select([_df.firstLetter.isin(final_df.firstLetter.value_counts()[:5].index.to_list())], ['Top5'],'')
           )
)

final_df.head()

Unnamed: 0,word,firstLetter,secondLetter,thirdLetter,fourthLetter,fifthLetter,top5FirstLetter
0,AAHED,A,A,H,E,D,
1,AALII,A,A,L,I,I,
2,AAPAS,A,A,P,A,S,
3,AARGH,A,A,R,G,H,
4,AARTI,A,A,R,T,I,


In [44]:
final_df.top5FirstLetter.value_counts()

        9135
Top5    5720
Name: top5FirstLetter, dtype: int64

array(['y', 'y', 'y', ..., '0', '0', 'y'], dtype='<U3')

## 4. Save the CSV file to local storage

In [45]:
todaysDate = datetime.today().strftime('%Y-%m-%d')
csvFileName = 'wordleList_' + todaysDate + '.csv'
display(f'File name: {csvFileName}')

final_df.to_csv('data/' + csvFileName)

'File name: wordleList_2023-05-23.csv'

## 5. Upload the DataFrame to CAS

a. Connect to the CAS server

In [51]:
conn = connect_to_cas()
type(conn)

swat.cas.connection.CAS

b. Upload the pandas DataFrame as a CAS table

In [52]:
conn.upload_frame(final_df, 
                  casout = {'name':'wordleList','caslib':'casuser'})

NOTE: Cloud Analytic Services made the uploaded file available as table WORDLELIST in caslib CASUSER(Peter.Styliadis@sas.com).
NOTE: The table WORDLELIST has been created in caslib CASUSER(Peter.Styliadis@sas.com) from binary data uploaded to Cloud Analytic Services.


CASTable('WORDLELIST', caslib='CASUSER(Peter.Styliadis@sas.com)')

c. Save the CAS table as a CSV file on the CAS server in the Casuser caslib

In [54]:
castbl = conn.CASTable('WORDLELIST', caslib='CASUSER')
castbl.save(name = csvFileName, caslib = 'casuser', replace = True)

NOTE: Cloud Analytic Services saved the file wordleList_2023-05-23.csv in caslib CASUSER(Peter.Styliadis@sas.com).


d. Terminate the CAS session

In [55]:
conn.terminate()

## 6. Open SAS Visual Analytics and Visualize the Data