# TmVO4 neutrons data analysis
Fit neutrons diffraction peaks measured on TmVO4 at SNS on 2019-02-14 in order to extract the orthorhombic distortion as a function of magnetic field

#### Import necessary modules

In [86]:
from mpl_toolkits import mplot3d# for 3D plotting

import copy as cp
import numpy as np
from os import chdir, getcwd, walk
from matplotlib import pyplot as plt, rc, rcParams# import matplotlib.pyplot as plt
# from matplotlib import rc
import pandas as pd
import re
import pickle
from lmfit import Model
# import pytest

#### Change default plotting parameters
see https://matplotlib.org/3.1.0/api/_as_gen/matplotlib.pyplot.figure.html

In [60]:
rcParams["figure.figsize"] = np.multiply([6.4, 4.8],0.5)# default is [6.4, 4.8]

In [2]:
from ENS_peak_fit_pVIC_py.pseudoVoigtIkedaCarpenter import pVIC
pvic_model = Model(pVIC)
print('parameter names: {}'.format(pvic_model.param_names))
print('independent variables: {}'.format(pvic_model.independent_vars))

parameter names: ['A', 'alpha', 'beta', 'R', 'gamma', 'sigma', 'k', 'x0']
independent variables: ['x']


### Import data

In [109]:
# Initialize list containing data to import
nData_raw = [None]*2

In [110]:
mypath1 = r'C:\Users\Pierre\Desktop\Postdoc\TmVO4\TmVO4_neutrons\2019-02_ORNL_Corelli\2019-02-14'
chdir(mypath1)
(_, _, filenames) = next(walk(mypath1))# the walk() function lists the content of the directory that it is given as argument,
# and of its subdirectories; the next() function returns the next output of the walk() function;
# when used only once, it returns only the first output which is the content of the parent directory, 
# listed as a tuple of the form (dirpath, dirnames, filenames)
filenames

['p6K_1T.txt',
 'p6K_1T_new.txt',
 'p6K_p05T.txt',
 'p6K_p4T.txt',
 'p6K_p5T.txt',
 'p6K_p6T.txt',
 'p6K_p77T.txt',
 'p6K_p7T.txt',
 'p6K_p83T.txt',
 'p6K_p97T.txt',
 'p6K_p9T.txt']

In [111]:
dfList1 = [None]*len(filenames)# Pre-allocate list to store the data that will then be converted into a Pandas DataFrame
for idx, filename in enumerate(filenames):# For each data file
    H = float(re.split('p6K_(\w*)T\w*.txt',filenames[idx])[1].replace('p','.'))# Extract value of magnetic field from filename
    ENS_data1 = pd.read_csv(filenames[idx],names=["hh0","I","dI"],skiprows=2,delimiter=',')# Import data as a Pandas DataFrame
    dfList1[idx] = {'filename': filenames[idx],# Store into a dictionary, which is itself an element of dfList1: the filename,
                   'H (T)': H,# value of magnetic field,
                   'T (K)': 0.6,# temperature,
                   'ENS_data': ENS_data1# and data,
                   }
nData_raw[0] = pd.DataFrame(dfList1)# Convert the list of dictionaries into a Pandas DataFrame
nData_raw[0]# Show the resulting DataFrame

Unnamed: 0,filename,H (T),T (K),ENS_data
0,p6K_1T.txt,1.0,0.6,hh0 I dI 0 -11.99...
1,p6K_1T_new.txt,1.0,0.6,hh0 I dI 0 -11.99...
2,p6K_p05T.txt,0.05,0.6,hh0 I dI 0 -11.9975...
3,p6K_p4T.txt,0.4,0.6,hh0 I dI 0 -11.997...
4,p6K_p5T.txt,0.5,0.6,hh0 I dI 0 -11.99...
5,p6K_p6T.txt,0.6,0.6,hh0 I dI 0 -11.99...
6,p6K_p77T.txt,0.77,0.6,hh0 I dI 0 -11.99...
7,p6K_p7T.txt,0.7,0.6,hh0 I dI 0 -11.99...
8,p6K_p83T.txt,0.83,0.6,hh0 I dI 0 -11.99...
9,p6K_p97T.txt,0.97,0.6,hh0 I dI 0 -11.99...


In [112]:
nData_raw[0]['ENS_data'][3].head()# Check the content of individual datasets after importation

Unnamed: 0,hh0,I,dI
0,-11.9975,142028.0,52309.7
1,-11.9925,220372.0,62925.4
2,-11.9875,220002.0,62818.9
3,-11.9825,518428.0,92939.8
4,-11.9775,817317.0,127501.0


In [113]:
mypath2 = r'C:\Users\Pierre\Desktop\Postdoc\TmVO4\TmVO4_neutrons\2019-02_ORNL_Corelli\2019-02-14\p6K\linecut2'
chdir(mypath2)
fieldInfo = pd.read_csv('field_info.txt',header=0,names=['FileName','T (K)','H (T)'],delimiter='\t')
fieldInfo.head()

Unnamed: 0,FileName,T (K),H (T)
0,HH0_88631,0.605973,0.0
1,HH0_88632,0.613334,0.049998
2,HH0_88634,0.625493,0.099996
3,HH0_88635,0.61822,0.150005
4,HH0_88636,0.614194,0.200003


In [114]:
dfList2 = [None]*len(fieldInfo)# Preallocate list to store the data that will then be converted into a Pandas DataFrame
for idx in range(len(fieldInfo)):# For each data file
    filename = fieldInfo['FileName'][idx] + '.txt'
#     print(filename)
    ENS_data2 = pd.read_csv(filename,names=["hh0","I","dI"],skiprows=2,delimiter=',')# Import data as a Pandas DataFrame
    dfList2[idx] = {'filename': filename,# Store into a dictionary, which is itself an element of dfList: the filename,
                   'H (T)': fieldInfo['H (T)'][idx],# value of magnetic field,
                   'T (K)': fieldInfo['T (K)'][idx],# temperature,
                   'ENS_data': ENS_data2# and data,
                   }
nData_raw[1] = pd.DataFrame(dfList2)# Convert the list of dictionaries into a Pandas DataFrame
nData_raw[1].head()# Show the resulting DataFrame

Unnamed: 0,filename,H (T),T (K),ENS_data
0,HH0_88631.txt,0.0,0.605973,hh0 I dI 0 -12.9975 0.0 0....
1,HH0_88632.txt,0.049998,0.613334,hh0 I dI 0 -12.9975 0.0 0....
2,HH0_88634.txt,0.099996,0.625493,hh0 I dI 0 -12.9975 0.0 0....
3,HH0_88635.txt,0.150005,0.61822,hh0 I dI 0 -12.9975 0.0 0....
4,HH0_88636.txt,0.200003,0.614194,hh0 I dI 0 -12.9975 0.0 0....


In [115]:
nData_raw[1]['ENS_data'][3].loc[180:185]#.head()# Check the content of individual datasets after importation

Unnamed: 0,hh0,I,dI
180,-12.0975,0.0,0.0
181,-12.0925,20535.1,14520.5
182,-12.0875,30592.1,22671.8
183,-12.0825,10336.1,22743.6
184,-12.0775,30571.4,17650.5
185,-12.0725,20162.3,14257.0


### Concatenate datasets

#### Prepare datasets for concatenation

##### Check that all hh0 data are the same within each dataset
i.e. that all hh0 of nData[0] are the same and that all hh0 data of nData[1] are the same

In [124]:
##### Interpolate data of nData_raw[0] so they have the same X data as that of nData_raw[1]
# First check that they differ
for data_idx in range(len(nData_raw)):# for each dataset
    for _, row in nData_raw[data_idx].iterrows():# loop over all rows
        if not np.array_equal(row.ENS_data.hh0,nData_raw[data_idx].ENS_data[0].hh0):
        # and compare the array of hh0 of that row with that of the first row
            print(row)# print the row if the two arrays are *not* equal
            # should output nothing, which means that all arrays of hh0 are the same *within a dataset*

##### Then check that hh0 data of nData[0] differ from that of nData[1]
We do not need to loop over all rows since we have shown in the previous cell that all rows are the same within a dataset

In [128]:
if not np.array_equal(nData_raw[0].ENS_data[0].hh0,nData_raw[1].ENS_data[0].hh0):
# compare the hh0 arrays of the first row of both datasets
    print("The arrarys of hh0 are not the same in both datasets")

The arrarys of hh0 are not the same in both datasets


### Next steps as of 2020-04-03
##### Updated idea
treat both datasets independently in terms of the plotting and fitting
* plot 3D color map of both datasets to check that the data are consistent within each dataset
* proceed to fit 

##### Outdated ideas
involving treating both datasets together, which will make things more complicated, and therefore increases the risks of making errors, in addition to increasing the time required for the analysis:
* interpolate ENS_data of nData_raw[0] so that its hh0 array is the same as that of nData_raw[1]
* rescale the data, if there is a physical way to do it, otherwise simply treat both datasets separately

#### Concatenate data and sort according to ascending value of magnetic field
##### Truely "deep" concatenation of nData_raw[0] and nData_raw[1]
such that nData_raw[0] and nData_raw[1] will *not* be modified if nData_unsorted is modified

See hacks.ipynb for more details, or https://stackoverflow.com/questions/52708341/make-a-truly-deep-copy-of-a-pandas-series

In [62]:
### Concatenate data
nData_unsorted = pickle.loads(pickle.dumps(pd.concat([nData_raw[0],nData_raw[1]],ignore_index=True)))# 
idx = 5
nData_unsorted.head()#.loc[idx:idx+10]

### Sort according to ascending value of magnetic field
nData_sorted = nData_unsorted.sort_values(by=['H (T)'],ignore_index=True)
nData_sorted.head()

Unnamed: 0,filename,H (T),T (K),ENS_data
0,HH0_88631.txt,0.0,0.605973,hh0 I dI 0 -12.9975 0.0 0....
1,HH0_88632.txt,0.049998,0.613334,hh0 I dI 0 -12.9975 0.0 0....
2,p6K_p05T.txt,0.05,0.6,hh0 I dI 0 -11.9975...
3,HH0_88634.txt,0.099996,0.625493,hh0 I dI 0 -12.9975 0.0 0....
4,HH0_88635.txt,0.150005,0.61822,hh0 I dI 0 -12.9975 0.0 0....


#### Basic data processing
Rescale data and remove "bad" data

##### Extract data at 0.86T 
to check that it does not make sense. Note that both H=0.86T and H=0.8649T round to 0.86 at the second decimal

In [47]:
test = nData_sorted[np.around(nData_sorted['H (T)'],2)==0.86].reset_index()
idx = 0 
test.ENS_data[idx][test.ENS_data[0].I>0]# returns nothing for idx =0,
# meaning that all the data is zero for that dataset...

Unnamed: 0,hh0,I,dI


##### Prepare plotting

In [78]:
%matplotlib qt
# plot figure in external window
f = plt.figure()

##### Perform data processing and plot the rescaled spectrum along with another one
Note (2020-04-03): best would be to break down the following code in the following way:
1. Rescale the data:
    * store the rescaled data into a temporary dataset, 
    * then plot this dataset with a reference dataset to compare them
    * modify the rescaling factor if necessary and reiterate
2. When the appropriate rescaling has been found, perform the basic data processing operations (rescaling the actual dataset and removing the bad ones)

In [85]:
###### Reset data in order to adjust the scaling factor
nData_unsorted = pickle.loads(pickle.dumps(pd.concat([nData_raw[0],nData_raw[1]],ignore_index=True)))# 
nData_sorted = nData_unsorted.sort_values(by=['H (T)'],ignore_index=True)

##### Rescale data at 0T, as it has a higher intensity than the rest
rescaling_factor = 0.635# empirical value that rescales the first spectrum to the other ones
for idx, row in nData_sorted[np.around(nData_sorted['H (T)'],2)==0.].iterrows():
#     nData.ENS_data[idx] = row['ENS_data'].copy()
    row['ENS_data'].I = row['ENS_data']['I']*rescaling_factor
#     print(idx, row['ENS_data']['I'])

##### Remove data 0.86T, since it is bad
delRowIdx = nData_sorted[np.around(nData_sorted['H (T)'],2)==0.86].index
# len(test['ENS_data'][idx][np.abs(test['ENS_data'][idx]['I'])>2*np.abs(test['ENS_data'][idx]['dI'])])
# test['ENS_data']
nData = nData_sorted.drop(delRowIdx)
nData.reset_index(drop=True,inplace=True)
idx = 89
nData.loc[idx:idx+4]

##### Plot a couple of ENS spectra to check consistency between spectra before fitting
plt.cla()
for idx in range(2):
    plt.plot(nData.ENS_data[idx].hh0,nData.ENS_data[idx].I)
#     plt.xlim([peak_center-plt_interval,peak_center+plt_interval])

##### Plot 3D color map of neutrons spectra
as spectrum intensity vs position in reciprocal space and magnetic field

to check that the field dependence of the data is consistent

In [130]:
fig = plt.figure()
ax = plt.axes(projection="3d")
for i in range(len(nData)):
    H = np.ones(np.shape(nData.ENS_data[i].hh0))*nData['H (T)'][i]
    ax.plot3D(nData.ENS_data[i].hh0,H,nData.ENS_data[i].I)

#### Fit data

##### Center of peak to be studied in the following

In [49]:
peak_center = -8.0# center of unsplit peak in reciprocal space
plt_interval = .1

##### Critical magnetic field 

In [91]:
Hc_0 = 0.51# value in Tesla units of the critical field at zero temperature
# in the absence of demagnetizing factor
# see data taken on needles of TmVO4-LS5200 in July 2017