In [41]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, size, udf, sum, count, countDistinct

import pyspark.sql.functions as func
from pyspark.sql.functions import UserDefinedFunction
from pyspark.sql.types import *

import numpy as np
from math import floor, ceil
from itertools import zip_longest
from numpy.linalg import inv

In [42]:
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [43]:
infile = "/Users/clairensaunders/Documents/Caltech/Research/Cu2O/cuprite_arcs/10K_test/out/nv3/QEI_no_nan_no_zero.csv"
W = np.eye(3)

In [44]:
"""
def within(limits, x):
    assert limits.shape == (len(x), 2), "Invalid k-space limits!"
    return np.all(x > limits[:, 0]) and np.all(x < limits[:, 1])

# test:
limits = np.arange(6).reshape((3, 2))
print(limits)
x1 = np.array([2, 3, 4])
print(within(limits, x1))
x2 = np.array([0.5, 2.5, 4.5])
print(within(limits, x2))
"""

'\ndef within(limits, x):\n    assert limits.shape == (len(x), 2), "Invalid k-space limits!"\n    return np.all(x > limits[:, 0]) and np.all(x < limits[:, 1])\n\n# test:\nlimits = np.arange(6).reshape((3, 2))\nprint(limits)\nx1 = np.array([2, 3, 4])\nprint(within(limits, x1))\nx2 = np.array([0.5, 2.5, 4.5])\nprint(within(limits, x2))\n'

In [45]:
def occupancy_BZ(infile, W=np.eye(3)):
    spark = SparkSession.builder.master('local').appName("slice").getOrCreate()
    dataschema = StructType([ StructField("H", FloatType(), False), \
                              StructField("K", FloatType(), False), \
                              StructField("L", FloatType(), False), \
                              StructField("E", FloatType(), False), \
                              StructField("I", FloatType(), False)])
    df = spark.read.csv(infile, sep=",", schema=dataschema)
    if np.allclose(W, np.eye(3)):
        df_BZ = df.withColumn("BZ_H", func.round(df.H)).withColumn("BZ_K", func.round(df.K)).withColumn("BZ_L", func.round(df.L))
    else:
        print("Transform applied...")
        inv_W = inv(W)
        df_BZ = df.withColumn("BZ_H", func.round(inv_W[0, 0] * df.H + inv_W[0, 1] * df.K + inv_W[0, 2] * df.L))\
                  .withColumn("BZ_K", func.round(inv_W[1, 0] * df.H + inv_W[1, 1] * df.K + inv_W[1, 2] * df.L))\
                  .withColumn("BZ_L", func.round(inv_W[2, 0] * df.H + inv_W[2, 1] * df.K + inv_W[2, 2] * df.L))
    stat_BZ = np.array(df_BZ.groupBy("BZ_H", "BZ_K", "BZ_L").count().collect()).astype(int)
    spark.stop()
    return stat_BZ[stat_BZ[:, -1].argsort()]

In [24]:
res0 = occupancy_BZ(infile, W)
print( res0 )

[[     0      0      3      1]
 [    -2     -1      3      1]
 [     2     -9      0      1]
 ...
 [     0      0      1 406012]
 [     0      0      0 511901]
 [     0      0      2 559768]]


In [28]:
print(res0.shape[0])

680


In [30]:
x_axis = np.linspace(0, res0.shape[0]-1, res0.shape[0])
trace0 = go.Scatter(
    x=x_axis,
    y=res0[:,-1]
)
trace1 = go.Scatter(
    x=x_axis,
    y=np.log(res2[:, -1])
)

iplot([trace0])
print(np.amax(res0, axis=0))
print(np.amin(res0, axis=0))
iplot([trace1])

[     9      3      5 559768]
[-5 -9 -2  1]


In [48]:
def occupancy_BZ_near(infile, W=np.eye(3), grid=0.1):
    spark = SparkSession.builder.master('local').appName("slice").getOrCreate()
    dataschema = StructType([ StructField("H", FloatType(), False), \
                              StructField("K", FloatType(), False), \
                              StructField("L", FloatType(), False), \
                              StructField("E", FloatType(), False), \
                              StructField("I", FloatType(), False)])
    df = spark.read.csv(infile, sep=",", schema=dataschema)
    print("Transform applied...")
    inv_W = inv(W)
    df_BZ = df.withColumn("H_", inv_W[0, 0] * df.H + inv_W[0, 1] * df.K + inv_W[0, 2] * df.L)\
            .withColumn("K_", inv_W[1, 0] * df.H + inv_W[1, 1] * df.K + inv_W[1, 2] * df.L)\
            .withColumn("L_", inv_W[2, 0] * df.H + inv_W[2, 1] * df.K + inv_W[2, 2] * df.L).cache()\
            .withColumn("BZ_H", func.round(col("H_")))\
            .withColumn("BZ_K", func.round(col("K_")))\
            .withColumn("BZ_L", func.round(col("L_")))\
            .withColumn("sub_BZ_H", func.round( (col("H_")-col("BZ_H"))/grid ))\
            .withColumn("sub_BZ_K", func.round( (col("K_")-col("BZ_K"))/grid ))\
            .withColumn("sub_BZ_L", func.round( (col("L_")-col("BZ_L"))/grid ))
    stat_BZ = np.array(df_BZ.groupBy("BZ_H", "BZ_K", "BZ_L")\
                        .agg(countDistinct("sub_BZ_H", "sub_BZ_K", "sub_BZ_L"))\
                        .collect()\
                  ).astype(int)
    spark.stop()
    return stat_BZ[stat_BZ[:, -1].argsort()]

In [49]:
#test:
res1 = occupancy_BZ_near(infile, W)
print(res1)

Transform applied...
[[   5   -2    4    1]
 [   2   -9    0    1]
 [   0    0    3    1]
 ...
 [   3   -6    3 1331]
 [  -1   -6    1 1331]
 [  -1   -7    1 1331]]


In [50]:
print(res1.shape)
print(res0.shape)

(680, 4)
(680, 4)


In [51]:
x_axis = np.linspace(0, res1.shape[0]-1, res1.shape[0])
trace0 = go.Scatter(
    x=x_axis,
    y=res0[:,-1]
)
trace1 = go.Scatter(
    x=x_axis,
    y=np.log(res1[:, -1])
)

iplot([trace0])
print(np.amax(res1, axis=0))
print(np.amin(res1, axis=0))
iplot([trace1])

[   9    3    5 1331]
[-5 -9 -2  1]


In [53]:
N = 680
x_axis = np.linspace(0, 680-1, 680)
com_res = np.zeros((N, 5), dtype=int)
com_res[:, :4] = res1
for i in range(N):
    for j in range(N):
        if np.all(res0[j, :3] == com_res[i, :3]):
            com_res[i, 4] = res0[j, 3]
print(com_res)
trace0 = go.Scatter(
    x=x_axis,
    y=com_res[:,-1]
)
iplot([trace0])

[[     5     -2      4      1      2]
 [     2     -9      0      1      1]
 [     0      0      3      1      1]
 ...
 [     3     -6      3   1331  76942]
 [    -1     -6      1   1331 197977]
 [    -1     -7      1   1331 155209]]


In [54]:
res3 = occupancy_BZ_near(infile, W, grid=0.05)
print(res3)

Transform applied...
[[  -1   -4    4    1]
 [   8   -5    3    1]
 [  -2   -1    3    1]
 ...
 [   0   -7    2 9253]
 [  -1   -7    2 9258]
 [  -3   -5    1 9259]]


In [55]:
x_axis = np.linspace(0, res3.shape[0]-1, res3.shape[0])
trace0 = go.Scatter(
    x=x_axis,
    y=res3[:,-1]
)
trace1 = go.Scatter(
    x=x_axis,
    y=np.log(res3[:, -1])
)

iplot([trace0])
print(np.amax(res3, axis=0))
print(np.amin(res3, axis=0))
iplot([trace1])

[   9    3    5 9259]
[-5 -9 -2  1]


In [57]:
x_axis = np.linspace(0, res3.shape[0]-1, res3.shape[0])
trace0 = go.Scatter(
    x=x_axis,
    y=res3[:,-1]/9261
)

iplot([trace0])
# print(np.amax(res3, axis=0))
# print(np.amin(res3, axis=0))
# iplot([trace1])






# plt.plot(res3[:, -1]/9261.)
# plt.show()
# plt.cla()
# plt.scatter(np.arange(100, 120), res3[100:120, -1]/9261.)
# plt.show()

In [90]:
np.savetxt("HKL_op.txt", res3)