In [2]:
# Loads the node ID's for each crack front and returns them as a list within a list...listception
def create_crack_fronts(fname):
    crack_fronts = []
    node_ids = []
    with open(fname, 'r') as f:
        for line in f:
            if 'Crack' in line:
                crack_fronts.append(list(node_ids))
                node_ids = []
            elif 'Number' not in line:
                node_ids.append(int(line))
    if (len(node_ids) != 0):
        crack_fronts.append(list(node_ids))
    return crack_fronts;

In [3]:
# Creates a KD Tree for the points in the file, use the centers-grain.csv file, 
# this method ignores the first line header. This returns the KD Tree and a list of the grain ids

from scipy import spatial

def create_tree(fname):
    x_s = []
    y_s = []
    z_s = []
    ids = []
    i = 0
    
    with open(fname, 'r') as f:
        for line in f:
            if i == 0:
                i += 1
            else:
                tokens = line.split(',')
                x_s.append(float(tokens[0]))
                y_s.append(float(tokens[1]))
                z_s.append(float(tokens[2]))
                ids.append(int(tokens[3].rstrip()))

    tree = spatial.KDTree(zip(x_s,y_s,z_s))
    return tree, ids

In [38]:
# Read all nodes into a temp DB so that we can pull them out by ID later.
# Nodes file format is ID,x,y,z

import sqlite3

def create_nodes_db(fname, dbpath):
    for iter_ in range(12):
        increment = 1500000
        start = (increment * iter_)
        end = start + increment
        i = 0
        conn = sqlite3.connect(dbpath)

        if iter_ == 0:
            conn.execute("create table nodes (id int, x float, y float, z float);")

        with open(fname, 'r') as f:
            for line in f:
                if i >= start and i < end:
                    tokens = line.split(' ')
                    conn.execute("insert into nodes values (" + tokens[0] + ',' +  tokens[1] + ',' + tokens[2] + ',' + 
                                 tokens[3].rstrip() + ");")
                i += 1

        conn.commit()
        conn.close()
        progress = iter_ / 12.0
        print progress
    print '1.0\n\nDONE!'

In [33]:
# Gets the x,y,z for the specified node ID, fname is for the DB, crack_front is the table that has the node IDs
def get_node_coors_from_id_setup(node_id, dbpath):
    conn = sqlite3.connect(dbpath)
    cursor = conn.execute("SELECT distinct x, y, z  from nodes where id = " + str(node_id))
    for row in cursor:
        x = row[0]
        y =  row[1]
        z = row[2]
    conn.close()
    return x, y, z

In [44]:
# Read crack front points into DB
def crack_front_db(crack_fronts, dbpath):

    conn = sqlite3.connect(dbpath)

    conn.execute("create table crack_front (node_id int, crack_id int, x float, y float, z float);")
    
    for i in range(1,10):
        for j in range(len(crack_fronts[i])):
            x, y, z = get_node_coors_from_id_setup(crack_fronts[i][j], dbpath)
            conn.execute("insert into crack_front values (" + str(crack_fronts[i][j]) + ',' + str(i) + ',' + str(x) + ',' + 
                         str(y) + ',' + str(z) + ");")
        print i / 10.0

    conn.commit()
    conn.close()

    print 'DONE!'

In [8]:
# Read euler angles into near.db

import sqlite3

def save_euler_angles(dbpath, fname):
    conn = sqlite3.connect(dbpath)

    conn.execute('create table euler_angles(grain_id primary key, phi1 float, Phi float, phi2 float);')
    
    x = 0
    
    with open(fname, 'r') as f:
        for line in f:
            if x > 0:
                tokens = line.split(' ')
                conn.execute("INSERT INTO euler_angles VALUES (" + tokens[0] + ',' +  tokens[30] + ',' + tokens[32] + ',' + 
                                 tokens[34].rstrip() + ");")
            else:
                x = 1

    conn.commit()
    conn.close()
    print 'done'

In [17]:
# Create base.db crack_nodes from near.db crack_front

def base_crack_nodes(base_dbpath, near_dbpath):
    
    base = sqlite3.connect(base_dbpath)
    
    base.execute('create table crack_nodes(crack_id int, arr_index int, node_order int, theta float, node_id int, ' +
                'x float, y float, z float, primary key(crack_id, arr_index));')

    near = sqlite3.connect(near_dbpath)
    
#     i = 9 # Change to 1 - 9
    for i in range(1,10):
        for j in range(len(crack_fronts[i])):
            cursor = near.execute('select * from crack_front where node_id = ' + str(crack_fronts[i][j]) + 
                                  ' and crack_id = ' + str(i))
            for row in cursor:
                node_id = row[0] # node_id
                crack_id = row[1] # crack_id
                x =  row[2] # x
                y = row[3] # y
                z = row[4] # z
            base.execute("insert into crack_nodes (crack_id, arr_index, node_id, x, y, z) values (" 
                         + str(crack_id) + ',' + str(j) + ',' + str(crack_fronts[i][j]) + ',' + 
                         str(x) + ',' + str(y) + ',' + str(z) + ");")

    base.commit()
    base.close()
    near.close()
    print 'done'

In [21]:
# Create base.db points from center-grains file and get euler angles from near.db
def create_base_points(base_dbpath, near_dbpath, fname):
    
    base = sqlite3.connect(base_dbpath)

    base.execute('create table points(grain_id int, x float, y float, z float, phi1 float, Phi float, phi2 float, ' + 
                'short_grain_id int, primary key(grain_id, x, y, z));')
    x = 0
    with open(fname, 'r') as f:
        for line in f:
            if x > 0:
                tokens = line.split(',')
                base.execute('insert into points (grain_id, x, y, z) values (' + str(tokens[3].rstrip()) + ',' + str(tokens[0]) + ',' + 
                             str(tokens[1]) + ',' + str(tokens[2]) + ')')
            else:
                x = 1

    base.commit()

    near = sqlite3.connect(near_dbpath)

    cursor = near.execute('select * from euler_angles')

    for row in cursor:
        grain_id = row[0]
        phi1 = row[1]
        Phi =  row[2]
        phi2 = row[3]
        base.execute('update points set phi1 = ' + str(phi1) + ',' + 'Phi = ' + str(Phi) + ',' + 'phi2 = ' + str(phi2) + 
                     ' where grain_id = ' + str(grain_id))

    base.commit()
    base.close()
    near.close()
    print 'done'

In [24]:
# Figure out which grains are touching and save them

def save_grain_boundaries(dbpath, fname):

    conn = sqlite3.connect(dbpath)

    x_s = []
    cursor = conn.execute('select distinct x from points;')
    for row in cursor:
        x_s.append(row[0])

    y_s = []
    cursor = conn.execute('select distinct y from points;')
    for row in cursor:
        y_s.append(row[0])

    z_s = []
    cursor = conn.execute('select distinct z from points;')
    for row in cursor:
        z_s.append(row[0])

    conn.close()

    ids = []

    with open(fname, 'r') as f:
        x = 0
        for line in f:
            if x > 0:
                tokens = line.split(',')
                ids.append(int(tokens[3].rstrip()))
            else:
                x = 1

    i = 0
    distance = 6
    num_x = 67
    num_xy = 8375
    x_max = 396
    y_max = 744
    z_max = 594
    bordering = {}

    for z in z_s:
        for y in y_s:
            for x in x_s:
                if x >= distance:
                    x_neg = x - distance
                else:
                    x_neg = x
                if x <= x_max - distance:
                    x_pos = x + distance
                else:
                    x_pos = x

                if y >= distance:
                    y_neg = y - distance
                else:
                    y_neg = y
                if y <= y_max - distance:
                    y_pos = y + distance
                else:
                    y_pos = y

                if z >= distance:
                    z_neg = z - distance
                else:
                    z_neg = z
                if z <= z_max - distance:
                    z_pos = z + distance
                else:
                    z_pos = z

                center_id = ids[int(int(x) / 6 + ((int(y) / 6) * 67) + ((int(z) * 8375) / 6))] % 10000
                x_neg_id = ids[int(int(x_neg) / 6 + ((int(y) / 6) * 67) + ((int(z) * 8375) / 6))] % 10000
                x_pos_id = ids[int(int(x_pos) / 6 + ((int(y) / 6) * 67) + ((int(z) * 8375) / 6))] % 10000
                y_neg_id = ids[int(int(x) / 6 + ((int(y_neg) / 6) * 67) + ((int(z) * 8375) / 6))] % 10000
                y_pos_id = ids[int(int(x) / 6 + ((int(y_pos) / 6) * 67) + ((int(z) * 8375) / 6))] % 10000
                z_neg_id = ids[int(int(x) / 6 + ((int(y) / 6) * 67) + ((int(z_neg) * 8375) / 6))] % 10000
                z_pos_id = ids[int(int(x) / 6 + ((int(y) / 6) * 67) + ((int(z_pos) * 8375) / 6))] % 10000

                touching = []
                if x_neg_id not in touching and x_neg_id != center_id:
                    touching.append(x_neg_id)
                if x_pos_id not in touching and x_pos_id != center_id:
                    touching.append(x_pos_id)
                if y_neg_id not in touching and y_neg_id != center_id:
                    touching.append(y_neg_id)
                if y_pos_id not in touching and y_pos_id != center_id:
                    touching.append(y_pos_id)
                if z_neg_id not in touching and z_neg_id != center_id:
                    touching.append(z_neg_id)
                if z_pos_id not in touching and z_pos_id != center_id:
                    touching.append(z_pos_id)

                temp = bordering.get(center_id, [])
                for touching_id in touching:
                    if touching_id not in temp:
                        temp.append(touching_id)
                bordering[center_id] = temp

    conn = sqlite3.connect(dbpath)

    conn.execute('create table boundaries(grain_id1 int,grain_id2 int,misor_val float,primary key(grain_id1, grain_id2), '+
                'foreign key (grain_id1) references points(grain_id),foreign key(grain_id2) references points(grain_id));')
    
    for center_ in bordering:
        for touching_ in bordering[center_]:
            conn.execute('insert into boundaries (grain_id1, grain_id2) values (%d, %d)' % (center_, touching_))
    conn.commit()
    conn.close
    print 'done'

In [26]:
def save_short_grain_id(dbpath):

    conn = sqlite3.connect(dbpath)

    cursor = conn.execute('select distinct grain_id from points;')
    for row in cursor:
        new_id = row[0] % 10000
        conn.execute('update points set short_grain_id=%d where grain_id=%d' % (new_id, row[0]))
    conn.commit()
    conn.close()
    print 'done'

In [32]:
# Put the nodes on each crack front in order using triangles
import math

def order_nodes(dbpath):

    conn = sqlite3.connect(dbpath)

    for crack_id in range(1,10):
        cursor = conn.execute('select * from crack_nodes where crack_id = ' + str(crack_id))
        order = []

        for row in cursor:
            arr_index = row[1]
            node_id = row[4]
            x = row[5]
            y =  row[6]
            z = row[7]
            y_ = y - 372.0
            order.append((arr_index, math.pi - math.atan2(x, y_), node_id, x, y, z))
        order_sorted = sorted(order, key=lambda tup: tup[1])
        for i in range(len(order_sorted)):
            conn.execute('update crack_nodes set theta='+str(order_sorted[i][1])+',node_order='+str(i)+
                         ' where crack_id='+str(crack_id)+' and arr_index='+str(order_sorted[i][0]))

    conn.commit()
    conn.close()
    print 'done'

In [5]:
# Return the nearest neighbors in the KD Tree to the x,y,z coordinates within the radius. The ids are used to pull out
# the grain id for the neighbors
def nearest_neighbor_ids_coors(tree, x, y, z, radius, ids):
    neighbors = tree.query_ball_point((x,y,z), radius)
    grain_ids_coors = []
    for i in neighbors:
        grain_ids_coors.append((ids[int((tree.data[i][0]/6)+(67*(tree.data[i][1]/6))+(8375*(tree.data[i][2])/6))], 
                          int(tree.data[i][0]), int(tree.data[i][1]), int(tree.data[i][2])))

    return grain_ids_coors

In [26]:
# Create nearest neighbor DB's and read points into them using KD Tree

import sqlite3

def create_neighbors(tree, ids, dbpath):
    k=0
    for mod in range(2,21):
        r = mod * 3 #- mod + 2 # Subtract mod to avoid overlap between neighborhoods, add 2 so r=6 still exists
        dbname = 'neighbor_%d' % (r)

        conn = sqlite3.connect(dbpath)

        conn.execute("create table %s (crack_id int,arr_index int,grain_id int,x float,y float,z float, \
                     primary key (crack_id, arr_index,grain_id, x, y, z),foreign key (crack_id, arr_index) references \
                      crack_nodes(crack_id, arr_index),foreign key (grain_id, x, y, z) references \
                     points(grain_id, x, y, z));" % dbname)

        for crack_id in range(1,10):
            cursor = conn.execute('select * from crack_nodes where crack_id = %d order by node_order asc' % (crack_id))
            for row in cursor:
                node_order = row[2]
                if (node_order % mod == 0):
                    arr_index = row[1]
                    x = row[5]
                    y = row[6]
                    z = row[7]
                    neighbors = nearest_neighbor_ids_coors(tree, x, y, z, r, ids)
                    for n in neighbors:
                        conn.execute('insert into %s values (%d, %d, %d, %d, %d, %d)' \
                                    % (dbname, crack_id, arr_index, n[0], n[1], n[2], n[3]))

        conn.commit()
        conn.close()
        print mod
    print 'done'

In [44]:
# Write nearest neighbors to text file
def write_to_text_files(crack_fronts, tree, ids, dbpath, directorypath, r=10, crack_id=1):    
    for point_in_crack in range(len(crack_fronts[crack_id])):
        x, y, z = get_node_coors_from_id_setup(crack_fronts[crack_id][point_in_crack], dbpath)
        neighbors = nearest_neighbor_ids_coors(tree, x, y, z, r, ids) 

        filename = directorypath + str(r) + '_' + str(crack_id) + '_' + str(point_in_crack) + '.csv'

        with open(filename, 'a') as f:
            f.write('x,y,z,grain_id\n')
            for i in range(len(neighbors)):
                f.write(str(neighbors[i][1]) + ',')
                f.write(str(neighbors[i][2]) + ',')
                f.write(str(neighbors[i][3]) + ',')
                f.write(str(neighbors[i][0]) + '\n')
        print point_in_crack
    print 'done'

In [11]:
# crack_fronts = create_crack_fronts('../../big-data/crack-front.txt')
print len(crack_fronts[9])

233


In [43]:
# This function takes a while to run, it will print its % progress as it goes
create_nodes_db('../../big-data/Nodes', '../../big-data/near.db')

0.0
0.0833333333333
0.166666666667
0.25
0.333333333333
0.416666666667
0.5
0.583333333333
0.666666666667
0.75
0.833333333333
0.916666666667
1.0

DONE!


In [45]:
# This function takes FOREVER to run, (~30 mins), also prints its progress
crack_front_db(crack_fronts, '../../big-data/near.db')

0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
DONE!


In [10]:
save_euler_angles('../../big-data/near.db', '../data/Materials')


done


In [18]:
base_crack_nodes('../../big-data/base.db', '../../big-data/near.db')

done


In [22]:
# Takes about a minute to run
create_base_points('../../big-data/base.db', '../../big-data/near.db', '../data/centers-grains.csv')

done


In [25]:
save_grain_boundaries('../../big-data/base.db', '../data/centers-grains.csv')

done


In [27]:
save_short_grain_id('../../big-data/base.db')

In [33]:
order_nodes('../../big-data/base.db')

done


In [27]:
# This takes a bit of time, but not too bad, maybe a few minutes
tree, ids = create_tree('../data/centers-grains.csv')
# print 'tree'
create_neighbors(tree, ids, '../../big-data/base.db') # /big-data-smaller-r/base.db')

2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
done


In [11]:
# This one can take about 30 minutes if you run it for higher crack_id's or with big r's
write_to_text_files(crack_fronts, tree, ids, '../../big-data/near.db', '../data/neighbors/r_', r=10, crack_id=9)

In [23]:
# conn = sqlite3.connect('../../big-data-smaller-r/base.db')
# for r in range(6,61,3):
#     conn.execute('drop table data_%d'%r)
# conn.commit()
# conn.close()

In [25]:
# for mod in range(2,21): # 2->3 for subtracting mod from r, nothing is closer than 6...
#     r = mod * 3 - mod + 2
#     print r

6
8
10
12
14
16
18
20
22
24
26
28
30
32
34
36
38
40
42
