# Data Processing for the ISS Archaeology Project

In [1]:
import json
import glob
import PIL
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import networkx as nx
import random
import numpy as np
from matplotlib.offsetbox import OffsetImage, AnnotationBbox

In [2]:
matplotlib.use('TkAgg')

### Loading Data

In [3]:
with open('rawdata.json') as jsonfile:
    data = json.load(jsonfile)

In [4]:
data

{'11531971213_e0586f0d11_o.jpg': {},
 's130e010538_9417436298_o.jpg': {'kimiya_yui&japan': [3],
  'robert_behnken&usa': [2],
  'nicholas_patrick&usa': [0],
  'kathryn_hire&usa': [1]},
 's128e007326_9468242042_o.jpg': {},
 's131e010300_9449954773_o.jpg': {'james_dutton&usa': [0]},
 's133e007907_9470732697_o.jpg': {'benjamin_drew&usa': [0]},
 'iss002e6546_9498018218_o.jpg': {'yury_usachev&russia': [0]},
 'zinnias_24634348595_o.jpg': {},
 'iss003e6606_9502928581_o.jpg': {},
 'iss01e5158_9471560353_o.jpg': {},
 's130e006987_9417110772_o.jpg': {'george_zamka&usa': [0]},
 'iss004e6340_9508220273_o.jpg': {'daniel_bursch&usa': [0]},
 's133e008819_9473505342_o.jpg': {},
 's130e007098_9417109628_o.jpg': {'kimiya_yui&japan': [0]},
 'iss003e6180_9504948842_o.jpg': {'scott_horowitz&usa': [0]},
 'iss002e5734_9495231819_o.jpg': {'shannon_walker&usa': [1],
  'james_voss&usa': [0]},
 's131e008741_9449193525_o.jpg': {'clayton_anderson&usa': [0]},
 's133e007425_9473520684_o.jpg': {'catherine_coleman&usa'

In [5]:
pairs = {}
for astros in data.values():
    allNames = list(astros.keys())
    for name1 in range(len(allNames)):
        for name2 in range(len(allNames))[name1+1:]:
            if str((allNames[name1], allNames[name2])) in pairs:
                pairs[str((allNames[name1], allNames[name2]))] += 1
            elif str((allNames[name2], allNames[name1])) in pairs:
                pairs[str((allNames[name2], allNames[name1]))] += 1
            else:
                pairs[str((allNames[name1], allNames[name2]))] = 1

In [6]:
with open('pairs.json', 'w') as write_file:
    json.dump(pairs, write_file)

In [7]:
names = {}
for astros in data.values():
    for item in astros.keys():
        name = item.split('&')[0]
        country = item.split('&')[1]
        if country == "canda":
            country = "canada"
        if country not in names:
            names[country] = [name.replace('_', ' ')]
        else:
            if name.replace('_', ' ') not in names[country]:
                names[country].append(name.replace('_', ' '))

In [8]:
list(names.keys())

['japan',
 'usa',
 'russia',
 'france',
 'italy',
 'belgium',
 'canada',
 'kazakhstan',
 'brazil',
 'germany',
 'greatbritain']

In [9]:
def getRandOffset(n, diam):
    coords = []
    row = int(np.sqrt(n))
    for i in range((int(n/row))+1):
        for j in range(row+1):
            coords.append([(diam/row)*i,(diam/row)*j + 2*i])

    return coords

### Importing Images

In [10]:
path = '../cropped_Astronaut_photos/'
files = [f for f in glob.glob(path + '*.jpg')] + [f for f in glob.glob(path + '*.jpeg')]

In [11]:
basewidth = 128
for f in files:
    name = f.split('&')[0].replace('_', ' ')
    im = Image.open(f)
    wpercent = (basewidth / float(im.size[0]))
    hsize = int((float(im.size[1]) * float(wpercent)))
    re = im.resize((basewidth, hsize), PIL.Image.ANTIALIAS)
    re.save('./resized_photos/resized_' + f.split('/')[-1])

In [12]:
path = 'resized_photos/'
img = {}
files = [f for f in glob.glob(path + '*.jpg')] + [f for f in glob.glob(path + '*.jpeg')]
for f in files:
    name = f.split('&')[0].split('_')
    img[name[-2] + " " + name[-1]] = mpimg.imread(f)
len(img.keys())

236

### Graphing

In [None]:
astronauts = nx.Graph()
nodes = []
node_colors = []
offsets = []
labels = {}
pos = {}
i = 0
j = 0
CENTROIDS = [(200, 500), (100, 100), (500, 500), (500, 200), (125, 400), (550, 300), (390, 150), (525, 255), (300, 570), (400, 600), (365, 595)]
DIAMS = [45, 230, 130, 50, 20, 50, 50, 50, 50, 50, 50]
COLORS = ['#cc0000', '#cc9900', '#009900', '#990099', '#6600ff', '#339966', '#663300', '#99cc00', '#727072', '#669999', '#993333']
for country in list(names.keys()):
    width = DIAMS[i]
    col = COLORS[i]
    offsets += getRandOffset(len(names[country]), width)[0:len(names[country])]
    for n in names[country]:
        centroid = CENTROIDS[i]
        (randX, randY) = offsets[j]
        pos[n] = np.array([(centroid[0] + randX), (centroid[1] + randY)])
        labels[n] = n
        nodes.append(n)
        node_colors.append(col)
        j += 1
    i += 1


plt.figure(3,figsize=(60,60))
astronauts.add_nodes_from(labels)
nx.draw_networkx_nodes(astronauts,
                       pos,
                       node_list=nodes,
                       node_color=node_colors,
                       node_size=1500,
                       alpha=0.8)

connections = []
for nameSet in pairs.keys():
    name1 = nameSet.split('\'')[1].split('&')[0].replace('_', ' ')
    name2 = nameSet.split('\'')[-2].split('&')[0].replace('_', ' ')
    connections.append((name1, name2, {}))
    
astronauts.add_edges_from(connections)
nx.draw_networkx_edges(astronauts,
                       pos,
                       edgelist=connections,
                       edge_color='#900000',
                       alpha=0.5)

ax = plt.gca()
fig = plt.gcf()
imsize = 0.02
trans =  ax.transData.transform
trans2 = fig.transFigure.inverted().transform
i = 0
for n in astronauts.nodes():
    (x, y) = pos[n]
    xx, yy = trans((x, y))
    xa, ya = trans2((xx, yy))
    a = plt.axes([xa-imsize/2.0, ya-imsize/2.0, imsize, imsize])
    a.imshow(img[n])
    a.set_aspect('equal')
    a.axis('off')
    plt.text(xa+60,ya+152,s=n, bbox=dict(facecolor=node_colors[i], alpha=0.25),horizontalalignment='center',fontsize=16)
    i += 1

# nx.draw_networkx_labels(astronauts,pos,labels,font_size=16)
#plt.axis('off')
plt.savefig("astronautrelations.png")
plt.show()

In [13]:
with open('frequentpairs.json') as jsonfile:
    freqdata = json.load(jsonfile)

In [14]:
fpairs = []
for nameSet in freqdata.keys():
    name1 = nameSet.split('\'')[1].split('&')[0].replace('_', ' ')
    name2 = nameSet.split('\'')[-2].split('&')[0].replace('_', ' ')
    if name1 not in fpairs:
        fpairs.append(name1)
    if name2 not in fpairs:
        fpairs.append(name2)
fpairs

['ronald garan',
 'andrei borisenko',
 'sergey volkov',
 'james reilly',
 'janet kavandi',
 'michael gernhardt',
 'stephen lendsey',
 'susan helms',
 'kathryn hire',
 'george zamka',
 'robert behnken',
 'nicholas patrick',
 'stephen robinson',
 'terry virts',
 'yury usachev',
 'james voss']

In [15]:
freq = nx.Graph()
nodes = []
node_colors = []
offsets = []
labels = {}
pos = {}
i = 0
j = 0
CENTROIDS = [(200, 500), (100, 100), (500, 500), (500, 200), (125, 400), (550, 300), (390, 150), (525, 255), (300, 570), (400, 600), (365, 595)]
DIAMS = [45, 230, 130, 50, 20, 50, 50, 50, 50, 50, 50]
COLORS = ['#cc0000', '#cc9900', '#009900', '#990099', '#6600ff', '#339966', '#663300', '#99cc00', '#727072', '#669999', '#993333']
for country in list(names.keys()):
    width = DIAMS[i]
    col = COLORS[i]
    offsets += getRandOffset(len(names[country]), width)[0:len(names[country])]
    for n in names[country]:
        if n in fpairs:
            centroid = CENTROIDS[i]
            (randX, randY) = offsets[j]
            pos[n] = np.array([(centroid[0] + randX), (centroid[1] + randY)])
            labels[n] = n
            nodes.append(n)
            node_colors.append(col)
        j += 1
    i += 1


plt.figure(3,figsize=(60,60))
freq.add_nodes_from(labels)
nx.draw_networkx_nodes(freq,
                       pos,
                       node_list=nodes,
                       node_color=node_colors,
                       node_size=1000,
                       alpha=0.8)

connections = []
for nameSet in pairs.keys():
    name1 = nameSet.split('\'')[1].split('&')[0].replace('_', ' ')
    name2 = nameSet.split('\'')[-2].split('&')[0].replace('_', ' ')
    if name1 in fpairs and name2 in fpairs:
        connections.append((name1, name2, {}))
    
freq.add_edges_from(connections)
nx.draw_networkx_edges(freq,
                       pos,
                       edgelist=connections,
                       edge_color='#900000',
                       alpha=0.9)

ax = plt.gca()
fig = plt.gcf()
imsize = 0.02
trans =  ax.transData.transform
trans2 = fig.transFigure.inverted().transform
i = 0
for n in freq.nodes():
    (x, y) = pos[n]
    xx, yy = trans((x, y))
    xa, ya = trans2((xx, yy))
    a = plt.axes([xa-imsize/2.0, ya-imsize/2.0, imsize, imsize])
    a.imshow(img[n])
    a.set_aspect('equal')
    a.axis('off')
    plt.text(xa+60,ya+152,s=n, bbox=dict(facecolor=node_colors[i], alpha=0.25),horizontalalignment='center',fontsize=16)
    i += 1

# nx.draw_networkx_labels(freq,pos,labels,font_size=16)
#plt.axis('off')
plt.savefig("freqrelations.png")
plt.show()

The iterable function was deprecated in Matplotlib 3.1 and will be removed in 3.3. Use np.iterable instead.
  if not cb.iterable(width):


In [None]:
# G=nx.cubical_graph()
# pos=nx.spring_layout(G) # positions for all nodes

# # nodes
# nx.draw_networkx_nodes(G,pos,
#                        nodelist=[0,1,2,3],
#                        node_color='r',
#                        node_size=500,
#                    alpha=0.8)
# nx.draw_networkx_nodes(G,pos,
#                        nodelist=[4,5,6,7],
#                        node_color='b',
#                        node_size=500,
#                    alpha=0.8)

# nx.draw_networkx_edges(G,pos,width=1.0,alpha=0.5)
# nx.draw_networkx_edges(G,pos,
#                         edgelist=[(0,1),(1,2),(2,3),(3,0)],
#                         width=8,alpha=0.5,edge_color='r')
# nx.draw_networkx_edges(G,pos,
#                         edgelist=[(4,5),(5,6),(6,7),(7,4)],
#                         width=8,alpha=0.5,edge_color='b')


# # some math labels
# labels={}
# labels[0]=r'$a$'
# labels[1]=r'$b$'
# labels[2]=r'$c$'
# labels[3]=r'$d$'
# labels[4]=r'$\alpha$'
# labels[5]=r'$\beta$'
# labels[6]=r'$\gamma$'
# labels[7]=r'$\delta$'
# nx.draw_networkx_labels(G,pos,labels,font_size=16)

# plt.axis('off')
# plt.show() # display