In [3]:
import pandas as pd
import numpy as np
import time
import os

%matplotlib inline
import matplotlib.pyplot as plt

In [3]:
#Pandas -> fitfunction pipeline:
#
#    Current method: pandas -> matrices, (fitfunction takes matrices)
#    QUESTION: how much do optimizations like vector -> scalar simplification matter?
#        (.202, .202, .202) vs .202
#        does it change based on the mathematical operation? exp(), a * b, etc.
#        does numpy already optimize w/ repeat values?
#    Idea is to implement simplification of multi-datasets (2D m x n, row = dataset, col = vals dataset)
#        you want to be able to collapse to a (m, 1) or (1, n) vector if possible, broadcast back out
#        QUESTION: does this work? does it make it faster? (again, by operation)
#    QUESTION: how much time does it take to test for simplification?
#        e.g. 30x30, 100x100 matrices: how long to test for scalarability/vectorability
#    %%timeit on
#        different "fitfunction" operations for each test, including very brutal ones
#            realistic: exp(x + offset), offset in [0, 13000, 26000, .... 130000] 
#        matrices with raw numbers (not a view on a smaller vector/etc.)
#        broadcasted vectors
#        mix of scalars, vectors, matrices
#        dataframes??
#    alternative: is there some way to make pandas dataframes more optimized for this?
#    also, what's nicer to use?

In [4]:
full_matrix = np.ones((10 , 10))
full_matrix = full_matrix * 3
full_vector = np.ones(10)
full_vector = full_vector * 3
broadcast_matrix = np.broadcast_arrays(full_vector, full_matrix)
scalar = 3

full_matrix

array([[ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
       [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
       [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
       [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
       [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
       [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
       [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
       [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
       [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
       [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.]])

In [5]:
full_vector

array([ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.])

In [6]:
broadcast_matrix

[array([[ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
        [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
        [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
        [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
        [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
        [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
        [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
        [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
        [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
        [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.]]),
 array([[ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
        [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
        [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
        [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
        [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
        [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
        [ 3.,  3.,  3.,  3.,  3.,  3.,

In [7]:
#def a number of functions

def cosine_exp(x):
    return np.cos(x) * np.exp(x)

def mag(x):
    return np.sqrt( 2 * np.power(x, 2) )



In [8]:
fnc_dict = {'cos_exp': cosine_exp,
            'mag': mag,           
           }

def time_run(data):
    this_time = time.clock()
    for fnc in fnc_dict:
        for i in range(0, 10000):
            fnc_dict[fnc](data)
    return time.clock() - this_time

In [52]:


matrix_time = time_run(full_matrix)
vector_time = time_run(full_vector)
scalar_time = time_run(scalar)

print('Matrix time: ')
print(matrix_time)
print('Vector time: ')
print(vector_time)
print('Scalar time: ')
print(scalar_time)

Matrix time: 
0.11025583027821995
Vector time: 
0.057704278177425294
Scalar time: 
0.05195597271449515
Matrix time: 
0.09793218811410043
Vector time: 
0.05691582224744707
Scalar time: 
0.053609450716976426
Matrix time: 
0.09957514557800096
Vector time: 
0.05362669271062259
Scalar time: 
0.05212547028258996
Matrix time: 
0.09765953081887346
Vector time: 
0.06451808042265839
Scalar time: 
0.06668355797592085
1 loop, best of 3: 206 ms per loop


In [54]:
%%timeit
time_run(full_vector)

10 loops, best of 3: 57.6 ms per loop


In [10]:
vector_time

0.05706310978941971

In [11]:
scalar_time

0.050591517289628685

In [96]:
from experimentdataanalysis.parsing.scandataparsing \
    import analyze_scan_filepath, analyze_string_for_dict_pairs
this_element_keyword_list = [("TRKR", "IsTRKR?", True),
                             ("RSA", "IsRSA?", True)]
next_element_keyword_list = [("Ind", "Scan_2D_Index"),
                             ("2Dscan", ["Scan_2D_Type", "Scan_1D_Type"])]
in_this_element_keyword_list = [("Vcm", "Electric Field (V/cm)"),
                                ("mT", "Magnetic Field (mT)"),
                                ("K", "Set Temperature (K)"),
                                ("nm", "Wavelength (nm)"),
                                ("ps", "Delay Time (ps)"),
                                ("run", "RunIndex"),
                                ("V", "Voltage (V)"),
                                ("x", "Scan_2D_Coord"),
                                ("uWpump", "Pump Power (uW)"),
                                ("uWprobe", "Probe Power (uW)"),
                                ("TestPhaseShift", "TestPhaseShift Experiment #"),
                                ("repeats", "# repeats"),
                               ]
parsing_keyword_lists = [this_element_keyword_list,
                         next_element_keyword_list,
                         in_this_element_keyword_list]

def analyze_filepath_and_header(filepath, header_string, existing_info_dict=None):
    if existing_info_dict is None:
        existing_info_dict = {}
    info_dict = analyze_scan_filepath(filepath, existing_info_dict, parsing_keyword_lists)
    analyze_string_for_dict_pairs(header_string, info_dict)
    if {'Scan_2D_Type', 'Scan_2D_Coord'}.issubset(info_dict.keys()):
        info_dict[info_dict['Scan_2D_Type']] = info_dict['Scan_2D_Coord']
    if 'BExternal' in info_dict.keys():
        info_dict['Magnetic Field (mT)'] = info_dict['BExternal']
    return info_dict

def get_inflection_points(series):
    trend_sign = np.sign(series.iloc[1] - series.iloc[0])
    yvals_iterator = series.items()
    last_yind, last_yval = next(yvals_iterator)  # pop off and add first (ind, val) pair
    inflection_point_indices = [last_yind]
    inflection_point_values = [last_yval]
    for y_ind, y_val in yvals_iterator:
        if np.sign(y_val - last_yval) == -1 * trend_sign:
            trend_sign = -1 * trend_sign
            inflection_point_indices.append(last_yind)
            inflection_point_values.append(last_yval)
        last_yind, last_yval = y_ind, y_val
    inflection_point_indices.append(y_ind)  # add last (ind, val) pair, too
    inflection_point_values.append(y_val)
    return inflection_point_indices, inflection_point_values


ModuleNotFoundError: No module named 'PyQt4'

In [12]:
new_data = np.array([[200.0, 0.439, 0.000945, 0.000058, 0.252928, 0.078952, 0.000079, 0.0000551],
                    [200.4, 0.439, 0.001819, 0.000781, 0.738559, 0.254511, 0.001000, 0.000202],
                    [200.8, 0.439, 0.000467, 0.000065, -0.049422, 0.161195, -0.000012, 0.000058],
                    [201.2, 0.439, 0.000624, 0.000073, 0.077881, 0.091944, 0.000952, 0.000054],
                    [201.6, 0.439, 0.001091, 0.000333, 0.175833, 0.177269, 0.001000, 0.000223]])

new_data

array([[  2.00000000e+02,   4.39000000e-01,   9.45000000e-04,
          5.80000000e-05,   2.52928000e-01,   7.89520000e-02,
          7.90000000e-05,   5.51000000e-05],
       [  2.00400000e+02,   4.39000000e-01,   1.81900000e-03,
          7.81000000e-04,   7.38559000e-01,   2.54511000e-01,
          1.00000000e-03,   2.02000000e-04],
       [  2.00800000e+02,   4.39000000e-01,   4.67000000e-04,
          6.50000000e-05,  -4.94220000e-02,   1.61195000e-01,
         -1.20000000e-05,   5.80000000e-05],
       [  2.01200000e+02,   4.39000000e-01,   6.24000000e-04,
          7.30000000e-05,   7.78810000e-02,   9.19440000e-02,
          9.52000000e-04,   5.40000000e-05],
       [  2.01600000e+02,   4.39000000e-01,   1.09100000e-03,
          3.33000000e-04,   1.75833000e-01,   1.77269000e-01,
          1.00000000e-03,   2.23000000e-04]])

In [15]:
data_frame = pd.DataFrame(new_data)

data_frame


Unnamed: 0,0,1,2,3,4,5,6,7
0,200.0,0.439,0.000945,5.8e-05,0.252928,0.078952,7.9e-05,5.5e-05
1,200.4,0.439,0.001819,0.000781,0.738559,0.254511,0.001,0.000202
2,200.8,0.439,0.000467,6.5e-05,-0.049422,0.161195,-1.2e-05,5.8e-05
3,201.2,0.439,0.000624,7.3e-05,0.077881,0.091944,0.000952,5.4e-05
4,201.6,0.439,0.001091,0.000333,0.175833,0.177269,0.001,0.000223


In [33]:
data_frame[data_frame[1] == data_frame[1][0]]

Unnamed: 0,0,1,2,3,4,5,6,7
0,200.0,0.439,0.000945,5.8e-05,0.252928,0.078952,7.9e-05,5.5e-05
1,200.4,0.439,0.001819,0.000781,0.738559,0.254511,0.001,0.000202
2,200.8,0.439,0.000467,6.5e-05,-0.049422,0.161195,-1.2e-05,5.8e-05
3,201.2,0.439,0.000624,7.3e-05,0.077881,0.091944,0.000952,5.4e-05
4,201.6,0.439,0.001091,0.000333,0.175833,0.177269,0.001,0.000223


In [51]:
%%timeit

const_vals = {}
col_titles = list(data_frame)

for col in col_titles:
    if len(data_frame[data_frame[col] == data_frame[col][0]]) == len(data_frame):
        const_vals[col] = data_frame[col][0]
        
const_vals

100 loops, best of 3: 3.2 ms per loop


[0, 1, 2, 3, 4, 5, 6, 7]