# TmVO4 neutrons data analysis
Fit neutrons diffraction peaks measured on TmVO4 at SNS on 2019-02-14 in order to extract the orthorhombic distortion as a function of magnetic field

#### Import necessary modules

In [150]:
from mpl_toolkits import mplot3d# for 3D plotting
import copy as cp
import numpy as np
import os
import matplotlib
from matplotlib import pyplot as plt, rcsetup, rc, rcParams# import matplotlib.pyplot as plt
# from matplotlib import rc
import pandas as pd
import re
import pickle
from lmfit import Model
# import pytest

#### Change default plotting parameters
see https://matplotlib.org/3.1.0/api/_as_gen/matplotlib.pyplot.figure.html

In [2]:
rcParams["figure.figsize"] = np.multiply([6.4, 4.8],0.5)# default is [6.4, 4.8]

In [3]:
from ENS_peak_fit_pVIC_py.pseudoVoigtIkedaCarpenter import pVIC
pvic_model = Model(pVIC)
print('parameter names: {}'.format(pvic_model.param_names))
print('independent variables: {}'.format(pvic_model.independent_vars))

parameter names: ['A', 'alpha', 'beta', 'R', 'gamma', 'sigma', 'k', 'x0']
independent variables: ['x']


### Import data
#### First import cora

In [45]:
tempPath = r'C:\Users\Pierre\Desktop\Postdoc\TmVO4\TmVO4_neutrons\2019-02_ORNL_Corelli\2019-02-14'
os.chdir(tempPath)
(_, _, filenames) = next(os.walk(tempPath))# the walk() function lists the content of the directory that it is given as argument,
# and of its subdirectories; the next() function returns the next output of the walk() function;
# when used only once, it returns only the first output which is the content of the parent directory, 
# listed as a tuple of the form (dirpath, dirnames, filenames)
filenames = [filename for filename in filenames if 'p6K_' in filename]# keep only files that match the string pattern of datafilestempList = [None]*len(filenames)# Pre-allocate temporary list to store the data that will then be converted into a Pandas DataFrame

tempList = [None]*len(filenames)
for idx, filename in enumerate(filenames):# For each data file
    H = float(re.split('p6K_(\w*)T\w*.txt',filenames[idx])[1].replace('p','.'))# Extract value of magnetic field from filename
    tempDF = pd.read_csv(filenames[idx],names=["hh0","I","dI"],skiprows=2,delimiter=',')# import data as a Pandas DataFrame
    tempList[idx] = {'filename': filenames[idx],# Store into a dictionary, which is itself an element of tempList: the filename,
                   'H (T)': H,# value of magnetic field,
                   'T (K)': 0.6,# temperature,
                   'spectra': tempDF# and data stored as Pandas dataframe
                   }
coarseData = pd.DataFrame(tempList)# Convert the list of dictionaries into a Pandas DataFrame
del idx, H, tempPath, filename, filenames, tempList, tempDF# delete temporary variables after use 
coarseData# Show the resulting DataFrame

Unnamed: 0,filename,H (T),T (K),spectra
0,p6K_1T.txt,1.0,0.6,hh0 I dI 0 -11.99...
1,p6K_1T_new.txt,1.0,0.6,hh0 I dI 0 -11.99...
2,p6K_p05T.txt,0.05,0.6,hh0 I dI 0 -11.9975...
3,p6K_p4T.txt,0.4,0.6,hh0 I dI 0 -11.997...
4,p6K_p5T.txt,0.5,0.6,hh0 I dI 0 -11.99...
5,p6K_p6T.txt,0.6,0.6,hh0 I dI 0 -11.99...
6,p6K_p77T.txt,0.77,0.6,hh0 I dI 0 -11.99...
7,p6K_p7T.txt,0.7,0.6,hh0 I dI 0 -11.99...
8,p6K_p83T.txt,0.83,0.6,hh0 I dI 0 -11.99...
9,p6K_p97T.txt,0.97,0.6,hh0 I dI 0 -11.99...


In [21]:
coarseData.spectra[3].head()# Check the content of individual datasets after importation

Unnamed: 0,hh0,I,dI
0,-11.9975,142028.0,52309.7
1,-11.9925,220372.0,62925.4
2,-11.9875,220002.0,62818.9
3,-11.9825,518428.0,92939.8
4,-11.9775,817317.0,127501.0


In [56]:
tempPath = r'C:\Users\Pierre\Desktop\Postdoc\TmVO4\TmVO4_neutrons\2019-02_ORNL_Corelli\2019-02-14\p6K\linecut_f'
os.chdir(tempPath)
fieldInfo = pd.read_csv('field_info.txt',header=None,names=['File #','T (K)','H (T)','Proton charge'],delimiter=' ')
fieldInfo['File #'] = fieldInfo['File #'].astype('int')# replace file number type from float to int
fieldInfo.head()
# fieldInfo['File #'].dtype# check that the change of datatype is effective

Unnamed: 0,File #,T (K),H (T),Proton charge
0,88631,0.605973,0.0,1.312139
1,88632,0.613334,0.049998,0.800119
2,88633,0.605727,0.099996,0.083534
3,88634,0.625493,0.099996,0.800739
4,88635,0.61822,0.150005,0.801294


In [57]:
tempList = [None]*len(fieldInfo)# Preallocate list to store the data that will then be converted into a Pandas DataFrame
for idx in range(len(fieldInfo)):# For each data file
    filename = ''.join(['HH0_',str(fieldInfo['File #'][idx]),'.txt'])
#     print(filename)
    tempDF = pd.read_csv(filename,names=["hh0","I","dI"],skiprows=2,delimiter=',')# Import data as a Pandas DataFrame
    tempList[idx] = {'filename': filename,# Store into a dictionary, which is itself an element of dfList: the filename,
                     'T (K)': fieldInfo['T (K)'][idx],# temperature,
                     'H (T)': fieldInfo['H (T)'][idx],# value of magnetic field,
                     'Proton charge': fieldInfo['Proton charge'][idx],# proton charge,
                     'spectra': tempDF# and data,
                     }
linecut_f_raw = pd.DataFrame(tempList)# Convert the list of dictionaries into a Pandas DataFrame
linecut_f_raw.head()# Show the resulting DataFrame

Unnamed: 0,filename,H (T),T (K),Proton charge,spectra
0,HH0_88631.txt,0.0,0.605973,1.312139,hh0 I dI 0 -12.99880 0.0 ...
1,HH0_88632.txt,0.049998,0.613334,0.800119,hh0 I dI 0 -12.99880 0.0 ...
2,HH0_88633.txt,0.099996,0.605727,0.083534,hh0 I dI 0 -12.99880 0.0 ...
3,HH0_88634.txt,0.099996,0.625493,0.800739,hh0 I dI 0 -12.99880 0.0 ...
4,HH0_88635.txt,0.150005,0.61822,0.801294,hh0 I dI 0 -12.99880 0.0 ...


In [58]:
del idx, tempPath, filename, tempList, tempDF# delete temporary variables after use 

In [115]:
linecut_f_raw['spectra'][3].loc[180:185]#.head()# Check the content of individual datasets after importation

Unnamed: 0,hh0,I,dI
180,-12.0975,0.0,0.0
181,-12.0925,20535.1,14520.5
182,-12.0875,30592.1,22671.8
183,-12.0825,10336.1,22743.6
184,-12.0775,30571.4,17650.5
185,-12.0725,20162.3,14257.0


### Concatenate datasets
Only if their x axis (hh0 data) are the same, which is not the case

##### Check that all hh0 data are the same within each dataset
i.e. that all hh0 of nData[0] are the same and that all hh0 data of nData[1] are the same

In [124]:
##### Interpolate data of coarseData so they have the same X data as that of linecut_f_raw
# First check that they differ
for data_idx in range(len(nData_raw)):# for each dataset
    for _, row in nData_raw[data_idx].iterrows():# loop over all rows
        if not np.array_equal(row.spectra.hh0,nData_raw[data_idx].spectra[0].hh0):
        # and compare the array of hh0 of that row with that of the first row
            print(row)# print the row if the two arrays are *not* equal
            # should output nothing, which means that all arrays of hh0 are the same *within a dataset*

##### Then check that hh0 data of nData[0] differ from that of nData[1]
We do not need to loop over all rows since we have shown in the previous cell that all rows are the same within a dataset

In [128]:
if not np.array_equal(coarseData.spectra[0].hh0,linecut_f_raw.spectra[0].hh0):
# compare the hh0 arrays of the first row of both datasets
    print("The arrarys of hh0 are not the same in both datasets")

The arrarys of hh0 are not the same in both datasets


### Next steps

##### Update 2020-04-06
In fact, ignore the coarse dataset, as it will require a lot of efforts for a minimal result.
More interesting would be to analyze the linecut_f dataset at each (hh0) peak position, for h=6, 8, 10.

##### Update 2020-04-03
treat both datasets independently in terms of the plotting and fitting
* plot 3D color map of both datasets to check that the data are consistent within each dataset
* proceed to fit 

##### Outdated ideas
involving treating both datasets together, which will make things more complicated, and therefore increases the risks of making errors, in addition to increasing the time required for the analysis:
* interpolate spectra of coarseData so that its hh0 array is the same as that of linecut_f_raw
* rescale the data, if there is a physical way to do it, otherwise simply treat both datasets separately

#### Prepare data and sort according to ascending value of magnetic field
##### Truely "deep" copy of linecut_f
such that coarseData and linecut_f_raw will *not* be modified if dfCopy is modified

See hacks.ipynb for more details, or https://stackoverflow.com/questions/52708341/make-a-truly-deep-copy-of-a-pandas-series

In [91]:
### Concatenate data
dfCopy = pickle.loads(pickle.dumps(linecut_f_raw))# 
idx = 5
dfCopy.head()#.loc[idx:idx+10]

### Sort according to ascending value of magnetic field
# nData_sorted = dfCopy.sort_values(by=['H (T)'],ignore_index=True)
# nData_sorted.head()

Unnamed: 0,filename,H (T),T (K),Proton charge,spectra
0,HH0_88631.txt,0.0,0.605973,1.312139,hh0 I dI 0 -12.99880 0.0 ...
1,HH0_88632.txt,0.049998,0.613334,0.800119,hh0 I dI 0 -12.99880 0.0 ...
2,HH0_88633.txt,0.099996,0.605727,0.083534,hh0 I dI 0 -12.99880 0.0 ...
3,HH0_88634.txt,0.099996,0.625493,0.800739,hh0 I dI 0 -12.99880 0.0 ...
4,HH0_88635.txt,0.150005,0.61822,0.801294,hh0 I dI 0 -12.99880 0.0 ...


#### Basic data processing
Rescale data and remove "bad" data

#### Data rescaling
This used to be done manually, after noticing that the intensity of the data at 0T was higher than that of the rest of the data. A factor of 0.635 was then used to rescale this spectrum to the level of other data. 
However, after getting the up-to-date data, it appears that the scaling factor is merely the proton charge, which is a proxy for the counting time of neutrons. With this information, it turns out that the ratio of Proton charges of the spectrum at 0T and that at 0.05T is 1.3 to 0.8. The rescaling factor of the former is thus 0.8/1.3=0.615. Hence the empirical value of 0.635 was a pretty good guess!

In [92]:
##### Rescale data according to their Proton charge
for idx, row in dfCopy.iterrows():
    row.spectra['Inorm'] = row.spectra.I/row['Proton charge']
    row.spectra['dInorm'] = row.spectra.dI/row['Proton charge']
#     print(idx, row['spectra']['I'])
idx = 1200# index that allows to look at data close to the (10 10 0) peak
dfCopy.spectra[0].loc[idx:idx+5]

Unnamed: 0,hh0,I,dI,Inorm,dInorm
1200,-9.99875,1261600.0,94684.7,961483.3,72160.557743
1201,-9.99625,1230390.0,98819.2,937697.7,75311.519049
1202,-9.99375,1448420.0,109920.0,1103862.0,83771.596753
1203,-9.99125,2141070.0,124810.0,1631740.0,95119.477718
1204,-9.98875,2560080.0,141466.0,1951073.0,107813.252422
1205,-9.98625,4044660.0,169302.0,3082493.0,129027.464278


#### Find bad data, if any
##### Identify datasets with zero intensity
to check that it does not make sense. Note that both H=0.86T and H=0.8649T round to 0.86 at the second decimal

In [93]:
dfClean = dfCopy.copy()
for idx, row in dfCopy.iterrows():# loop over all spectra
    if not np.any(row.spectra.Inorm>0):# if any spectrum has a constant zero intensity
        dfClean = dfCopy.drop(idx)
        print(idx)# output the row index on the first run, nothing if run a second time (since the row has already been dropped)
# dfCopy.spectra[0].Inorm

84


##### Prepare plotting

In [108]:
%matplotlib qt
# plot figures in external window
# f = plt.figure()

##### Plot spectra by groups of nSpec
to see if any spectrum differs from the others.
This method may fail if there are more than nSpec bad consecutive spectra. Hence nSpec should not be too small (5 or more should be good)

Notes as of 2020-04-06:
* After plotting all spectra together, it looks like only spectrum #85 seems to have a lower than intensity than the others. Not sure yet if I should discard it or try to rescale it (discarding is probably the best option as it avoids waste of time, complications, and the risk of making the fits not as good if theissue with the spectrum is more than a simple scaling factor)
* To do:
    1. Add value of magnetic field in legend (in addition to spectrum index)
    2. Delete bad spectra (i.e. #85)
    3. Move on to fit

In [210]:
nSpec = 7# number of curves in each plot
nfig = len(dfClean)//nSpec+1
fig = [None]*(nfig)
lgd = [None]*(nfig)
# for fidx in range(2):
for fidx in range(nfig):
    fig[fidx] = plt.figure()
    ax = fig[fidx].add_subplot(1,1,1)
    for pidx in range(nSpec*fidx,nSpec*(fidx+1)):
        try:
            plt.plot(dfClean.spectra[pidx].hh0,dfClean.spectra[pidx].Inorm,label=f'{pidx}')
        except KeyError:
            continue
    plt.xlim(-10.25,-5.75)
    lgd[fidx] = ax.legend(title='Spectrum index')
    lgd[fidx].set_draggable(True)
    plt.show()# ensures that all windows come to the foreground

##### Perform data processing and plot the rescaled spectrum along with another one
Note (2020-04-03): best would be to break down the following code in the following way:
1. Rescale the data:
    * store the rescaled data into a temporary dataset, 
    * then plot this dataset with a reference dataset to compare them
    * modify the rescaling factor if necessary and reiterate
2. When the appropriate rescaling has been found, perform the basic data processing operations (rescaling the actual dataset and removing the bad ones)

In [85]:
##### Remove data 0.86T, since it is bad
delRowIdx = nData_sorted[np.around(nData_sorted['H (T)'],2)==0.86].index
# len(test['spectra'][idx][np.abs(test['spectra'][idx]['I'])>2*np.abs(test['spectra'][idx]['dI'])])
# test['spectra']
nData = nData_sorted.drop(delRowIdx)
nData.reset_index(drop=True,inplace=True)
idx = 89
nData.loc[idx:idx+4]

##### Plot a couple of ENS spectra to check consistency between spectra before fitting
plt.cla()
for idx in range(2):
    plt.plot(nData.spectra[idx].hh0,nData.spectra[idx].I)
#     plt.xlim([peak_center-plt_interval,peak_center+plt_interval])

##### Plot 3D color map of neutrons spectra
as spectrum intensity vs position in reciprocal space and magnetic field

to check that the field dependence of the data is consistent

In [130]:
fig = plt.figure()
ax = plt.axes(projection="3d")
for i in range(len(nData)):
    H = np.ones(np.shape(nData.spectra[i].hh0))*nData['H (T)'][i]
    ax.plot3D(nData.spectra[i].hh0,H,nData.spectra[i].I)

#### Fit data

##### Center of peak to be studied in the following

In [49]:
peak_center = -8.0# center of unsplit peak in reciprocal space
plt_interval = .1

##### Critical magnetic field 

In [91]:
Hc_0 = 0.51# value in Tesla units of the critical field at zero temperature
# in the absence of demagnetizing factor
# see data taken on needles of TmVO4-LS5200 in July 2017