## This notebook creates a folder of tables for the final project with the correct naming conventions 

In [78]:
import shutil
import os
import tempfile

In [79]:
import configparser
config = configparser.ConfigParser()
config.read("../../env.ini")
data_home  = config['DEFAULT']['data_home']
output_dir = config['DEFAULT']['output_dir']

In [80]:
if not os.path.exists(f"{output_dir}/00_final_tables"):
    os.makedirs(f"{output_dir}/00_final_tables")

In [81]:
ls -l

total 186912
-rw-r--r--  1 steffenerickson  staff   7232131 Apr 29 19:38 BOW.csv
-rw-r--r--  1 steffenerickson  staff   1507591 Apr 29 19:40 BOW_SENT.csv
-rw-r--r--  1 steffenerickson  staff  31054282 Apr 29 19:38 CORPUS.csv
-rw-r--r--  1 steffenerickson  staff     80094 Apr 29 19:40 DOC_SENT.csv
-rw-r--r--  1 steffenerickson  staff  15526432 Apr 29 19:38 DTM.csv
-rw-r--r--  1 steffenerickson  staff    360766 Apr 29 19:39 LDA_PHI.csv
-rw-r--r--  1 steffenerickson  staff     65536 Apr 29 19:39 LDA_THETA.csv
-rw-r--r--  1 steffenerickson  staff     61728 Apr 29 19:37 LIB.csv
-rw-r--r--  1 steffenerickson  staff      1093 Apr 29 19:38 PCA_COMPONENTS.csv
-rw-r--r--  1 steffenerickson  staff    104892 Apr 29 19:38 PCA_DCM.csv
-rw-r--r--  1 steffenerickson  staff    221500 Apr 29 19:38 PCA_LOADINGS.csv
-rw-r--r--  1 steffenerickson  staff  33366980 Apr 29 19:38 TFIDF.csv
-rw-r--r--  1 steffenerickson  staff   2978607 Apr 29 19:38 TFIDF_L2.csv
-rw-r--r--  1 steffenerickson  staff    935930 Ap

In [82]:
oldnames = "library_personbysession corpus vocab bow dtcm tfidf tfidf_reduced_L2 pca_compinf \
pca_dcm pca_loadings lda_theta lda_phi sent_bow sent_doc sent_vocab  \
w2v_0pre w2v_1post w2v_2classroom".split()

In [83]:
newnames = "LIB CORPUS VOCAB BOW DTM TFIDF TFIDF_L2 PCA_COMPONENTS PCA_DCM PCA_LOADINGS LDA_THETA LDA_PHI \
BOW_SENT DOC_SENT VOCAB_SENT VOCAB_W2V_0pre VOCAB_W2V_1pre VOCAB_W2V_2post".split()

In [84]:
len(oldnames) == len(newnames)

True

In [85]:
oldnames

['library_personbysession',
 'corpus',
 'vocab',
 'bow',
 'dtcm',
 'tfidf',
 'tfidf_reduced_L2',
 'pca_compinf',
 'pca_dcm',
 'pca_loadings',
 'lda_theta',
 'lda_phi',
 'sent_bow',
 'sent_doc',
 'sent_vocab',
 'w2v_0pre',
 'w2v_1post',
 'w2v_2classroom']

In [86]:
newnames

['LIB',
 'CORPUS',
 'VOCAB',
 'BOW',
 'DTM',
 'TFIDF',
 'TFIDF_L2',
 'PCA_COMPONENTS',
 'PCA_DCM',
 'PCA_LOADINGS',
 'LDA_THETA',
 'LDA_PHI',
 'BOW_SENT',
 'DOC_SENT',
 'VOCAB_SENT',
 'VOCAB_W2V_0pre',
 'VOCAB_W2V_1pre',
 'VOCAB_W2V_2post']

In [87]:
source_directory = output_dir 
destination_directory = f"{output_dir}/00_final_tables"

if not os.path.exists(destination_directory):
    os.makedirs(destination_directory)

for i in range(len(oldnames)):
    oldfilename = oldnames[i] + '.csv'
    newfilename = newnames[i] + '.csv'
    source_path = os.path.join(source_directory, oldfilename)
    destination_path = os.path.join(destination_directory, newfilename)

    # Create a temporary directory
    with tempfile.TemporaryDirectory() as temp_dir:
        # Temporary path for the intermediate copy
        intermediate_copy_path = os.path.join(temp_dir, oldfilename)

        # Copy the file to the temporary directory
        shutil.copy2(source_path, intermediate_copy_path)

        # Move the copied file from the temporary directory to the final destination
        shutil.move(intermediate_copy_path, destination_path)


In [88]:
os.chdir(f"{output_dir}/00_final_tables")

In [89]:
ls -l 

total 186912
-rw-r--r--  1 steffenerickson  staff   7232131 Apr 29 19:38 BOW.csv
-rw-r--r--  1 steffenerickson  staff   1507591 Apr 29 20:17 BOW_SENT.csv
-rw-r--r--  1 steffenerickson  staff  31054282 Apr 29 19:38 CORPUS.csv
-rw-r--r--  1 steffenerickson  staff     80094 Apr 29 20:17 DOC_SENT.csv
-rw-r--r--  1 steffenerickson  staff  15526432 Apr 29 19:38 DTM.csv
-rw-r--r--  1 steffenerickson  staff    360766 Apr 29 20:11 LDA_PHI.csv
-rw-r--r--  1 steffenerickson  staff     65536 Apr 29 20:11 LDA_THETA.csv
-rw-r--r--  1 steffenerickson  staff     61728 Apr 29 19:37 LIB.csv
-rw-r--r--  1 steffenerickson  staff      1093 Apr 29 20:28 PCA_COMPONENTS.csv
-rw-r--r--  1 steffenerickson  staff    104892 Apr 29 20:28 PCA_DCM.csv
-rw-r--r--  1 steffenerickson  staff    221500 Apr 29 20:28 PCA_LOADINGS.csv
-rw-r--r--  1 steffenerickson  staff  33366980 Apr 29 19:38 TFIDF.csv
-rw-r--r--  1 steffenerickson  staff   2978607 Apr 29 19:38 TFIDF_L2.csv
-rw-r--r--  1 steffenerickson  staff    935930 Ap