# TmVO4 neutrons data analysis
Fit neutrons diffraction peaks measured on TmVO4 at SNS on 2019-02-14 in order to extract the orthorhombic distortion as a function of magnetic field

#### Import necessary modules

In [1]:
from mpl_toolkits.mplot3d import Axes3D# for 3D plotting
import matplotlib.tri as mtri# for triangulation of unevenly separated data, like our magnetic field data

import copy as cp, numpy as np, pandas as pd, pickle, os, re
import matplotlib
from matplotlib import cm, pyplot as plt, rcsetup, rc, rcParams# import matplotlib.pyplot as plt
# cm stands for colormap
from matplotlib.ticker import LogLocator, LinearLocator, FormatStrFormatter
from scipy.interpolate import griddata

from lmfit import Model
# import pytest

#### Change default plotting parameters
see https://matplotlib.org/3.1.0/api/_as_gen/matplotlib.pyplot.figure.html

In [2]:
rcParams["figure.figsize"] = np.multiply([6.4, 4.8],0.5)# default is [6.4, 4.8]

### Import data
#### First import cora

In [4]:
tempPath = r'C:\Users\Pierre\Desktop\Postdoc\TmVO4\TmVO4_neutrons\2019-02_ORNL_Corelli\2019-02-14'
os.chdir(tempPath)
(_, _, filenames) = next(os.walk(tempPath))# the walk() function lists the content of the directory that it is given as argument,
# and of its subdirectories; the next() function returns the next output of the walk() function;
# when used only once, it returns only the first output which is the content of the parent directory, 
# listed as a tuple of the form (dirpath, dirnames, filenames)
filenames = [filename for filename in filenames if 'p6K_' in filename]# keep only files that match the string pattern of datafilestempList = [None]*len(filenames)# Pre-allocate temporary list to store the data that will then be converted into a Pandas DataFrame

tempList = [None]*len(filenames)
for idx, filename in enumerate(filenames):# For each data file
    H = float(re.split('p6K_(\w*)T\w*.txt',filenames[idx])[1].replace('p','.'))# Extract value of magnetic field from filename
    tempDF = pd.read_csv(filenames[idx],names=["hh0","I","dI"],skiprows=2,delimiter=',')# import data as a Pandas DataFrame
    tempList[idx] = {'filename': filenames[idx],# Store into a dictionary, which is itself an element of tempList: the filename,
                   'H (T)': H,# value of magnetic field,
                   'T (K)': 0.6,# temperature,
                   'spectra': tempDF# and data stored as Pandas dataframe
                   }
coarseData = pd.DataFrame(tempList)# Convert the list of dictionaries into a Pandas DataFrame
del idx, H, tempPath, filename, filenames, tempList, tempDF# delete temporary variables after use 
coarseData# Show the resulting DataFrame

Unnamed: 0,filename,H (T),T (K),spectra
0,p6K_1T.txt,1.0,0.6,hh0 I dI 0 -11.99...
1,p6K_1T_new.txt,1.0,0.6,hh0 I dI 0 -11.99...
2,p6K_p05T.txt,0.05,0.6,hh0 I dI 0 -11.9975...
3,p6K_p4T.txt,0.4,0.6,hh0 I dI 0 -11.997...
4,p6K_p5T.txt,0.5,0.6,hh0 I dI 0 -11.99...
5,p6K_p6T.txt,0.6,0.6,hh0 I dI 0 -11.99...
6,p6K_p77T.txt,0.77,0.6,hh0 I dI 0 -11.99...
7,p6K_p7T.txt,0.7,0.6,hh0 I dI 0 -11.99...
8,p6K_p83T.txt,0.83,0.6,hh0 I dI 0 -11.99...
9,p6K_p97T.txt,0.97,0.6,hh0 I dI 0 -11.99...


In [5]:
coarseData.spectra[3].head()# Check the content of individual datasets after importation

Unnamed: 0,hh0,I,dI
0,-11.9975,142028.0,52309.7
1,-11.9925,220372.0,62925.4
2,-11.9875,220002.0,62818.9
3,-11.9825,518428.0,92939.8
4,-11.9775,817317.0,127501.0


In [6]:
tempPath = r'C:\Users\Pierre\Desktop\Postdoc\TmVO4\TmVO4_neutrons\2019-02_ORNL_Corelli\2019-02-14\p6K\linecut_f'
os.chdir(tempPath)
fieldInfo = pd.read_csv('field_info.txt',header=None,names=['File #','T (K)','H (T)','Proton charge'],delimiter=' ')
fieldInfo['File #'] = fieldInfo['File #'].astype('int')# replace file number type from float to int
fieldInfo.head()
# fieldInfo['File #'].dtype# check that the change of datatype is effective

Unnamed: 0,File #,T (K),H (T),Proton charge
0,88631,0.605973,0.0,1.312139
1,88632,0.613334,0.049998,0.800119
2,88633,0.605727,0.099996,0.083534
3,88634,0.625493,0.099996,0.800739
4,88635,0.61822,0.150005,0.801294


In [7]:
tempList = [None]*len(fieldInfo)# Preallocate list to store the data that will then be converted into a Pandas DataFrame
for idx in range(len(fieldInfo)):# For each data file
    filename = ''.join(['HH0_',str(fieldInfo['File #'][idx]),'.txt'])
#     print(filename)
    tempDF = pd.read_csv(filename,names=["hh0","I","dI"],skiprows=2,delimiter=',')# Import data as a Pandas DataFrame
    tempList[idx] = {'filename': filename,# Store into a dictionary, which is itself an element of dfList: the filename,
                     'T (K)': fieldInfo['T (K)'][idx],# temperature,
                     'H (T)': fieldInfo['H (T)'][idx],# value of magnetic field,
                     'Proton charge': fieldInfo['Proton charge'][idx],# proton charge,
                     'spectra': tempDF# and data,
                     }
linecut_f_raw = pd.DataFrame(tempList)# Convert the list of dictionaries into a Pandas DataFrame
linecut_f_raw.head()# Show the resulting DataFrame

Unnamed: 0,filename,T (K),H (T),Proton charge,spectra
0,HH0_88631.txt,0.605973,0.0,1.312139,hh0 I dI 0 -12.99880 0.0 ...
1,HH0_88632.txt,0.613334,0.049998,0.800119,hh0 I dI 0 -12.99880 0.0 ...
2,HH0_88633.txt,0.605727,0.099996,0.083534,hh0 I dI 0 -12.99880 0.0 ...
3,HH0_88634.txt,0.625493,0.099996,0.800739,hh0 I dI 0 -12.99880 0.0 ...
4,HH0_88635.txt,0.61822,0.150005,0.801294,hh0 I dI 0 -12.99880 0.0 ...


In [8]:
del idx, tempPath, filename, tempList, tempDF# delete temporary variables after use 

In [9]:
linecut_f_raw['spectra'][3].loc[180:185]#.head()# Check the content of individual datasets after importation

Unnamed: 0,hh0,I,dI
180,-12.5487,0.0,0.0
181,-12.5462,0.0,0.0
182,-12.5437,0.0,0.0
183,-12.5412,0.0,0.0
184,-12.5388,0.0,0.0
185,-12.5363,0.0,0.0


#### Test whether or not to concatenate dataframes
Only concatenate if their x axis (hh0 data) are the same. 
However the following code shows that it is not the case.

##### <a name="hh0_consistency"></a>Check that all hh0 data are the same within each dataframe
i.e. that all hh0 of coarseData are the same and that all hh0 data of linecut_f_raw are the same

In [10]:
nData_raw = [coarseData,linecut_f_raw]
for data_idx in range(len(nData_raw)):# for each dataset
    for _, row in nData_raw[data_idx].iterrows():# loop over all rows
        if not np.array_equal(row.spectra.hh0,nData_raw[data_idx].spectra[0].hh0):
        # and compare the array of hh0 of that row with that of the first row
            print(row)# print the row if the two arrays are *not* equal
            # should output nothing, which means that all arrays of hh0 are the same *within a dataset*

##### Then check that hh0 data of coarseData differ from that of linecut_f_raw
We do not need to loop over all rows since we have shown in the previous cell that all rows are the same within a dataset

In [11]:
if not np.array_equal(coarseData.spectra[0].hh0,linecut_f_raw.spectra[0].hh0):
# compare the hh0 arrays of the first row of both datasets
    print("The arrarys of hh0 are not the same in both datasets")

The arrarys of hh0 are not the same in both datasets


#### Next steps

##### Update 2020-04-06
In fact, ignore the coarse dataset, as it will require a lot of efforts for a minimal result.
More interesting would be to analyze the linecut_f dataset at each (hh0) peak position, for h=6, 8, 10.

##### Update 2020-04-03
treat both datasets independently in terms of the plotting and fitting
* plot 3D color map of both datasets to check that the data are consistent within each dataset
* proceed to fit 

##### Outdated ideas
involving treating both datasets together, which will make things more complicated, and therefore increases the risks of making errors, in addition to increasing the time required for the analysis:
* interpolate spectra of coarseData so that its hh0 array is the same as that of linecut_f_raw
* rescale the data, if there is a physical way to do it, otherwise simply treat both datasets separately

#### Prepare data and sort according to ascending value of magnetic field
##### Truely "deep" copy of linecut_f
such that linecut_f_raw will *not* be modified if lcf_copy is modified

The best way to truely, i.e. recursively, deep copy a python object is to `pickle.dump` and `pickle.load` it. 
That is because `cPickle` is the fastest, as shown [here](https://stackoverflow.com/questions/1410615/copy-deepcopy-vs-pickle),  *and* **in Python 3**, `cPickle` is the default behavior of `pickle`, as explained [here](https://askubuntu.com/a/804618).
See also hacks.ipynb, and [here](https://stackoverflow.com/questions/52708341/make-a-truly-deep-copy-of-a-pandas-series). 

In [12]:
lcf_copy = pickle.loads(pickle.dumps(linecut_f_raw))# fastest python hack to create a truely deep copy
# lcf stands for linecut_f, which is the name of the file from which the data was imported
# idx = 5
lcf_copy.head()#.loc[idx:idx+10]

Unnamed: 0,filename,T (K),H (T),Proton charge,spectra
0,HH0_88631.txt,0.605973,0.0,1.312139,hh0 I dI 0 -12.99880 0.0 ...
1,HH0_88632.txt,0.613334,0.049998,0.800119,hh0 I dI 0 -12.99880 0.0 ...
2,HH0_88633.txt,0.605727,0.099996,0.083534,hh0 I dI 0 -12.99880 0.0 ...
3,HH0_88634.txt,0.625493,0.099996,0.800739,hh0 I dI 0 -12.99880 0.0 ...
4,HH0_88635.txt,0.61822,0.150005,0.801294,hh0 I dI 0 -12.99880 0.0 ...


#### Basic data processing
Rescale data and remove "bad" data

#### Data rescaling
This used to be done manually, after noticing that the intensity of the data at 0T was higher than that of the rest of the data. A factor of 0.635 was then used to rescale this spectrum to the level of other data. 
However, after getting the up-to-date data, it appears that the scaling factor is merely the proton charge, which is a proxy for the counting time of neutrons. With this information, it turns out that the ratio of Proton charges of the spectrum at 0T and that at 0.05T is 1.3 to 0.8. The rescaling factor of the former is thus 0.8/1.3=0.615. Hence the empirical value of 0.635 was a pretty good guess!

In [13]:
##### Rescale data according to their Proton charge
for idx, row in lcf_copy.iterrows():
    row.spectra['Inorm'] = row.spectra.I/row['Proton charge']
    row.spectra['dInorm'] = row.spectra.dI/row['Proton charge']
#     print(idx, row['spectra']['I'])
idx = 1200# index that allows to look at data close to the (10 10 0) peak
lcf_copy.spectra[0].loc[idx:idx+5]# check one of the resulting dataframes

Unnamed: 0,hh0,I,dI,Inorm,dInorm
1200,-9.99875,1261600.0,94684.7,961483.3,72160.557743
1201,-9.99625,1230390.0,98819.2,937697.7,75311.519049
1202,-9.99375,1448420.0,109920.0,1103862.0,83771.596753
1203,-9.99125,2141070.0,124810.0,1631740.0,95119.477718
1204,-9.98875,2560080.0,141466.0,1951073.0,107813.252422
1205,-9.98625,4044660.0,169302.0,3082493.0,129027.464278


#### Find bad data, if any
##### Identify datasets with zero intensity and create a new "clean" dataset without bad data

In [14]:
lcf_clean = lcf_copy.copy()
# this is a deepcopy according to Pandas, which is only deep at the lowest order, i.e. not recursively
for idx, row in lcf_copy.iterrows():# loop over all spectra
    if not np.any(row.spectra.Inorm>0):# if any spectrum has a constant zero intensity
        lcf_clean = lcf_copy.drop(idx)
        print(idx)# output the row index of lcf_copy containing empty data
# lcf_copy.spectra[0].Inorm

84


##### Batch plot the rest of the spectra to identify other potential bad data
###### Prepare plotting

In [15]:
%matplotlib qt
# plot figures in external window

###### Plot spectra by groups of nSpec
to see if any spectrum differs from the others.
This method may fail if there are more than nSpec bad consecutive spectra. Hence nSpec should not be too small (5 or more should be good)

In [16]:
nSpec = 7# number of curves in each plot
nfig = len(lcf_clean)//nSpec+1
fig = [None]*(nfig)
lgd = [None]*(nfig)
for fidx in range(1):#range(nfig):
    fig[fidx] = plt.figure()
    ax = fig[fidx].add_subplot(1,1,1)
    for pidx in range(nSpec*fidx,nSpec*(fidx+1)):
        try:
            plt.plot(lcf_clean.spectra[pidx].hh0,lcf_clean.spectra[pidx].Inorm,\
                     label=f"{pidx}, {lcf_clean['H (T)'][pidx]:.2f}")
        except KeyError:# if the index of data to be plotted does not exist
            continue# ignore and carry on to the next one
    plt.xlim(-10.25,-5.75)
    lgd[fidx] = ax.legend(title='Index, H (T)')
    lgd[fidx].set_draggable(True)
    plt.show()# ensures that all windows come to the foreground

##### Notes --- 2020-04-07
* After plotting all spectra together, it appears that only spectra #2 and #85 are "bad": the former is very noisy (not too surprising given that its Proton charge is 1/10 of the other data) and the latter has a lower intensity than the other spectra (for an unknown reason).
* Both those bad spectra were identified during the measurement, such that another (good) spectrum was measured at both values of magnetic field at which those bad spectra had been measured. Concretely, spectrum #2 was measured at 0.1T and #85 at 0.865T but are bad. Spectra #3 and #104 were also measured at 0.1T and 0.865T, respectively, and are good. Hence, the two bad spectra should just be discarded, there is no reason to try and process them in order to try and make them good: first because there is no way of making good data out of bad data (bad data is just bad data), so it would be a waste of time, and potentially a lot of time, but it would also be complicated, and because the result cannot be good, it can only influence the result of the subsequent fits in a bad way, thus inducing distrust on the results obtained over the whole dataset instead of just two spectra. Bottom line: discard spectra #2 and #85.

##### Remove bad data identified in batch plotting

In [17]:
delRowIdx = [2,85]# index of data to remove
lcf_clean.drop(delRowIdx,inplace=True)

#### Create and plot final clean dataset
###### Create dataset sorted by value of magnetic field

In [18]:
nData = lcf_clean.sort_values(by=['H (T)'],ignore_index=True)
# lcf_clean.head()# nData stands for "neutrons Data"

###### Plot a couple of ENS spectra to check consistency between spectra before fitting

In [19]:
plt.cla()
for idx in range(2):
    plt.plot(nData.spectra[idx].hh0,nData.spectra[idx].Inorm)

#### Plot entire dataset in 3D
as spectrum normalized intensity 'Inorm' vs position in reciprocal space 'hh0' and magnetic field 'H (T)', to check that the field dependence of the data is consistent.
##### First plot individual spectra in a 3D space

In [20]:
fig = plt.figure()
ax = plt.axes(projection="3d")
for i in range(len(nData)):
    H = np.ones(np.shape(nData.spectra[i].hh0))*nData['H (T)'][i]
    ax.plot3D(nData.spectra[i].hh0,H,nData.spectra[i].Inorm)
del H

##### Then plot 3D color map
###### Create meshgrid for 3D color map
The mesh can safely be created using hh0 data from the dataset measured at any value of magnetic field, since we've shown [that all hh0 data are the same within the linecut_f dataframe](#hh0_consistency)

In [130]:
idx = 0# first dataset, but any row index of nData would do
hh0mesh, Hmesh = np.meshgrid(nData.spectra[idx].hh0, nData['H (T)'])
hh0lin = hh0mesh.reshape(np.size(hh0mesh))
Hlin = Hmesh.reshape(np.size(Hmesh))

###### Create the 2D array that contains the intensity data

In [129]:
Imesh = np.stack([nData.spectra[idx].Inorm for idx in range(len(nData))],axis=0)# for mesh
Ilin = np.hstack([nData.spectra[idx].Inorm for idx in range(len(nData))])# for griddata

###### Plot the hh0mesh and Hmesh data to see how irregular they are

In [76]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
step = 50
ax.scatter(hh0mesh[:,::step], Hmesh[:,::step], marker=".",s=1)
ax.set_xlabel('hh0')
ax.set_ylabel('H')
plt.show()

###### Conclusion
The hh0 data looks pretty regular, however the magnetic field data is not

##### Use griddata to interpolate the intensity data over a regular array

In [125]:
Hrange = np.arange(0,1.5,0.005)# regular array of magnetic field values to interpolate on
hh0meshInterp, HmeshInterp = np.meshgrid(nData.spectra[idx].hh0, Hrange)# meshgrid for interpolated data
Iinterp = griddata(np.array([hh0lin, Hlin]).T, Ilin, (hh0meshInterp, HmeshInterp), method='linear')

###### Plot 3D surface
Note: Matplotlib does not do a very good job of plotting the 3D surface when the xlim is not adapted to the range of the data and when there is noise in the data: in our case, it looks like spectra are plotted individually rather than as a continuous surface. When zooming on each peak, the surface looks a little better. Perhaps look for a better 3D visualization tool.

In [188]:
fig = plt.figure()
ax = fig.gca(projection='3d')
mesh_select = np.logical_and(hh0mesh > -10.1, hh0mesh < -9.9)
# surf = ax.plot_surface(np.where(mesh_select,hh0mesh,np.nan),
#                        np.where(mesh_select,Hmesh,np.nan),
#                        np.where(mesh_select,Imesh,np.nan),
#                        cmap=cm.coolwarm,
#                        linewidth=0, antialiased=False)
surf = ax.plot_surface(hh0mesh,Hmesh,Imesh, cmap=cm.rainbow)
# surf = ax.plot_surface(hh0meshInterp,HmeshInterp,Iinterp, cmap=cm.coolwarm)
# ax.zaxis.set_major_locator(LinearLocator(10))
# ax.zaxis.set_major_formatter(FormatStrFormatter('%.2g'))
plt.xlim(-10.1, -9.9)
# Add a color bar which maps values to colors.
fig.colorbar(surf, shrink=0.5, aspect=5)
plt.show()

##### Matplotlib example of 3D surface plotting

In [132]:
fig = plt.figure()
ax = fig.gca(projection='3d')

# Make data.
X = np.arange(-5, 5, 0.25)
Y = np.arange(-5, 5, 0.25)
X, Y = np.meshgrid(X, Y)
R = np.sqrt(X**2 + Y**2)
Z = np.sin(R)

# Plot the surface.
surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                       linewidth=0, antialiased=False)

##### Notes --- 2020-04-08
* Next steps:
    1. Plot 3D color map of data
    2. Move on to fit

###### Then plot 3D color map
To do as of 2020-04-08...

## Fit data

### General parameters

In [21]:
peak_center = -8.0# center of unsplit peak to be studied in the following, in reciprocal space units
plt_interval = .1# half of plot interval
Hc_0 = 0.51# value in Tesla units of the critical field at zero temperature
# in the absence of demagnetizing factor
# see data taken on needles of TmVO4-LS5200 in July 2017

### Fit model
#### Make model from fit function

In [23]:
from ENS_peak_fit_pVIC_py.pseudoVoigtIkedaCarpenter import pVIC
pvic_model = Model(pVIC)# create Model object from the lmfit module
print(f'parameter names: {pvic_model.param_names}')
print(f'independent variables: {pvic_model.independent_vars}')

parameter names: ['A', 'alpha', 'beta', 'R', 'gamma', 'sigma', 'k', 'x0']
independent variables: ['x']


#### Fit parameters
##### Create fit parameters and specify their properties
including initial values, constraints, etc.

In [35]:
pvic_params = pvic_model.make_params(A=2e5, alpha=140, beta=0, R=0, gamma=0, sigma=6.6e-3, k=.05, x0=peak_center)
# fitData = nData.spectra[0].Inorm
# pvic_params = pvic_model.guess(fitData)# returns NotImplementedError
pvic_params

name,value,initial value,min,max,vary
A,200000.0,,-inf,inf,True
alpha,140.0,,-inf,inf,True
beta,0.0,,-inf,inf,True
R,0.0,,-inf,inf,True
gamma,0.0,,-inf,inf,True
sigma,0.0066,,-inf,inf,True
k,0.05,,-inf,inf,True
x0,-8.0,,-inf,inf,True


In [36]:
pvic_params['A'].set(value=1e7, min=0)
len(pvic_params)

8