In [3]:
# import packages
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import umap
import matplotlib.lines as mlines
%matplotlib inline

In [4]:
# set seaborn settings
sns.set(style='white', context='notebook', rc={'figure.figsize':(14,10)})

# Function to create UMAP graphs

In [109]:
def clus_plots_finalsave(small_results,results,name):
    # save the list of facsimile makers and as numbers
    makers = []
    makersnum = []
    for i in range(len(small_results.iloc[0])):
        sign = small_results.iloc[0][i]
        position = sign.index('_0') # gets position of the _0 in the filename
        val = sign[position+6:position+7]
        if val == '1':
            val = 'Moller'
            makersnum.append(0)
        if val == '2':
            val = 'Poe'
            makersnum.append(1)
        if val == '3':
            val = 'Tabin'
            makersnum.append(2)
        makers.append(val)

    # save the list of provenances and as numbers
    locs = []
    locsnum = []
    for i in range(len(small_results.iloc[0])):
        sign = small_results.iloc[0][i]
        position = sign.index('_0') # gets position of the _0 in the filename
        val = sign[position+8:position+9]
        if val == '1':
            val = 'Thebes'
            locsnum.append(0)
        if val == '2':
            val = 'Lahun'
            locsnum.append(1)
        if val == '3':
            val = 'Hatnub'
            locsnum.append(2)
        if val == '4':
            val = 'Unknown'
            locsnum.append(3)
        locs.append(val)

    # save the list of texts and as numbers
    texts = []
    textsnum = []
    genre_num = []
    for i in range(len(small_results.iloc[0])):
        sign = small_results.iloc[0][i]
        position = sign.index('_0') # gets position of the _0 in the filename
        val = sign[position+10:]
        if val == '1':
            val = 'Shipwrecked'
            textsnum.append(0)
            genre_num.append(0)
        if val == '2':
            val = 'Peasant_B1'
            textsnum.append(1)
            genre_num.append(0)
        if val == '3':
            val = 'Peasant_R'
            textsnum.append(2)
            genre_num.append(0)
        if val == '4':
            val = 'Sinuhe_B'
            textsnum.append(3)
            genre_num.append(0)
        if val == '5':
            val = 'Sinuhe_R'
            textsnum.append(4)
            genre_num.append(0)
        if val == '6':
            val = 'Prisse'
            textsnum.append(5)
            genre_num.append(1)
        if val == '7':
            val = 'Hymn'
            textsnum.append(6)
            genre_num.append(2)
        if val == '8':
            val = 'Temple_Files'
            textsnum.append(7)
            genre_num.append(3)
        if val == '9':
            val = 'Will_of_Wah'
            textsnum.append(8)
            genre_num.append(3)
        if val == '10':
            val = 'Texte_aus_Hatnub'
            textsnum.append(9)
            genre_num.append(4)
        if val == '11':
            val = 'Ebers'
            textsnum.append(10)
            genre_num.append(5)
        if val == '12':
            val = 'Rhind'
            textsnum.append(11)
            genre_num.append(6)
        if val == '13':
            val = 'Westcar'
            textsnum.append(12)
            genre_num.append(0)
        texts.append(val)

    #find difference scores for all signs
    top = small_results.iloc[0]
    top = top.reset_index(drop=True) # fix axes for all inputs dataframe
    top = top.T.reset_index(drop=True).T
    top_list = top.values.tolist() # save a list to use later
    final_dists = pd.DataFrame(np.nan, index=top, columns=top)

    for j in range(len(top)):
        frame = results.iloc[:,j*2:2+j*2]
        frame = frame.dropna()
        og_sign = frame.iloc[0,0]
        dists = []
        distances = []

        for i in range(len(frame)):
            sign = frame.iloc[i,0]
            dist_to = frame.iloc[i,1]

            if sign in top.values:
                x = top.loc[top == sign].index.tolist()
                x = str(x[0])
                col = results.iloc[:, int(x)*2]

                if og_sign in col.values:
                    y = col.loc[col == og_sign].index.tolist()
                    y = str(y[0])
                    dist_from = results.iloc[int(y), int(x)*2+1]
                    dist_tot = dist_to + dist_from

                else:
                    dist_tot = dist_to*2
                    print("Sign " + str(og_sign) + " unexpectedly does not appear in the list for " + str(sign) + ", although the reverse is true. Doubling the value that is present.")

                final_dists.loc[og_sign].loc[sign] = dist_tot
                final_dists.loc[sign].loc[og_sign] = dist_tot

    # replace NAs with max distance value in set
    final_dists_val = final_dists.fillna(final_dists.max().max())
    
    # initialize and run umap
    reducer = umap.UMAP(metric='precomputed')
    embedding = reducer.fit_transform(final_dists_val)
    
    # make plots
    plt.scatter(
        embedding[:, 0],
        embedding[:, 1],
        c=[sns.color_palette(palette='colorblind')[x] for x in makersnum])

    moller_leg = mlines.Line2D([], [], color=sns.color_palette(palette='colorblind')[0], marker='o', label='Moller')
    poe_leg = mlines.Line2D([], [], color=sns.color_palette(palette='colorblind')[1], marker='o', label='Poe')
    tabin_leg = mlines.Line2D([], [], color=sns.color_palette(palette='colorblind')[2], marker='o', label='Tabin')

    plt.legend(handles=[moller_leg,poe_leg,tabin_leg])
    plt.gca().set_aspect('equal', 'datalim')
    plt.title('UMAP Projection of ' + name + ' Data Set by Facsimile Maker', fontsize=24)
    plt.savefig('/Users/.../' + name + 'bymaker.png') # place where the figure should be saved
    plt.close()
    
    plt.scatter(
        embedding[:, 0],
        embedding[:, 1],
        c=[sns.color_palette(palette='colorblind')[x] for x in locsnum])

    thebes_leg = mlines.Line2D([], [], color=sns.color_palette(palette='colorblind')[0], marker='o', label='Thebes')
    lahun_leg = mlines.Line2D([], [], color=sns.color_palette(palette='colorblind')[1], marker='o', label='Lahun')
    hatnub_leg = mlines.Line2D([], [], color=sns.color_palette(palette='colorblind')[2], marker='o', label='Hatnub')
    unknown_leg = mlines.Line2D([], [], color=sns.color_palette(palette='colorblind')[3], marker='o', label='Unknown')

    plt.legend(handles=[thebes_leg,lahun_leg,hatnub_leg,unknown_leg])
    plt.gca().set_aspect('equal', 'datalim')
    plt.title('UMAP Projection of ' + name + ' Data Set by Provenance', fontsize=24)
    plt.savefig('/Users/.../' + name + 'byprovenance.png') # place where the figure should be saved
    plt.close()
    
    col = ['red','blue','green','purple','pink','orange','cyan','magenta','limegreen','black','brown','olive','cornflowerblue']
    plt.scatter(
        embedding[:, 0],
        embedding[:, 1],
        c=[col[x] for x in textsnum])

    shipwrecked_leg = mlines.Line2D([], [], color=col[0], marker='o', label='Shipwrecked')
    peasantB1_leg = mlines.Line2D([], [], color=col[1], marker='o', label='Peasant B1')
    peasantR_leg = mlines.Line2D([], [], color=col[2], marker='o', label='Peasant R')
    sinuheB_leg = mlines.Line2D([], [], color=col[3], marker='o', label='Sinuhe B')
    sinuheR_leg = mlines.Line2D([], [], color=col[4], marker='o', label='Sinuhe R')
    prisse_leg = mlines.Line2D([], [], color=col[5], marker='o', label='Prisse')
    hymn_leg = mlines.Line2D([], [], color=col[6], marker='o', label='Hymn')
    files_leg = mlines.Line2D([], [], color=col[7], marker='o', label='Temple Files')
    will_leg = mlines.Line2D([], [], color=col[8], marker='o', label='Will of Wah')
    aushatnub_leg = mlines.Line2D([], [], color=col[9], marker='o', label='Texte aus Hatnub')
    ebers_leg = mlines.Line2D([], [], color=col[10], marker='o', label='Ebers')
    rhind_leg = mlines.Line2D([], [], color=col[11], marker='o', label='Rhind')
    westcar_leg = mlines.Line2D([], [], color=col[12], marker='o', label='Westcar')

    plt.legend(handles=[shipwrecked_leg,peasantB1_leg,peasantR_leg,sinuheB_leg,sinuheR_leg,
                       prisse_leg,hymn_leg,files_leg,will_leg,aushatnub_leg,ebers_leg,rhind_leg,westcar_leg])
    plt.gca().set_aspect('equal', 'datalim')
    plt.title('UMAP Projection of ' + name + ' Data Set by Text', fontsize=24)
    plt.savefig('/Users/.../' + name + 'bytext.png') # place where the figure should be saved
    plt.close() 
    
    plt.scatter(
        embedding[:, 0],
        embedding[:, 1],
        c=[sns.color_palette(palette='colorblind')[x] for x in genre_num])

    Literary_leg = mlines.Line2D([], [], color=sns.color_palette(palette='colorblind')[0], marker='o', label='Literary')
    Instruction_leg = mlines.Line2D([], [], color=sns.color_palette(palette='colorblind')[1], marker='o', label='Instruction')
    Hymn_leg = mlines.Line2D([], [], color=sns.color_palette(palette='colorblind')[2], marker='o', label='Hymn')
    Administrative_leg = mlines.Line2D([], [], color=sns.color_palette(palette='colorblind')[3], marker='o', label='Administrative')
    Inscription_leg = mlines.Line2D([], [], color=sns.color_palette(palette='colorblind')[4], marker='o', label='Inscription')
    Medical_leg = mlines.Line2D([], [], color=sns.color_palette(palette='colorblind')[5], marker='o', label='Medical')
    Mathematical_leg = mlines.Line2D([], [], color=sns.color_palette(palette='colorblind')[6], marker='o', label='Mathematical')

    plt.legend(handles=[Literary_leg,Instruction_leg,Hymn_leg,Administrative_leg,Inscription_leg,Medical_leg,Mathematical_leg])
    plt.gca().set_aspect('equal', 'datalim')
    plt.title('UMAP Projection of ' + name + ' Data Set by Genre', fontsize=24)
    plt.savefig('/Users/.../' + name + 'bygenre.png') # place where the figure should be saved
    plt.close()

# Run with various signs and save umap graphs

In [1]:
allsigns = ['A1','A2','...'] # sign names

for i in range(len(allsigns)):
    results = pd.read_csv('/Users/.../' + allsigns[i] + '_fullresults.csv', index_col=[0])
    small_results = pd.read_csv('/Users/.../' + allsigns[i] + '_nameresults.csv', index_col=[0])
    name = allsigns[i]
    clus_plots_finalsave(small_results,results,name)

NameError: name 'pd' is not defined