# Transient Absorption TK4
Import libraries

In [None]:
import numpy as np
import pandas as pd
import scipy.optimize as opt
from scipy import stats

import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
%matplotlib notebook

## Import data
First import and plot the ascorbic acid, water and methylene blue files:

In [None]:
ascorbic2 = pd.read_csv('Files/GROUP A PM Ascorbic SPECTRUM.txt', 
                             delimiter='\t', 
                        skiprows=17,
                        skipfooter=1,
                             names=['wavelength', 'absorbance'])

In [None]:
water = pd.read_csv('Files/GROUP A PM WATER BLANK .txt', 
                             delimiter='\t', 
                        skiprows=17,
                        skipfooter=1,
                             names=['wavelength', 'absorbance'])

In [None]:
methylblue = pd.read_csv('Files/Methyl blue.txt', 
                             delimiter='\t', 
                        skiprows=17,
                        skipfooter=1,
                             names=['wavelength', 'absorbance'])

In [None]:
fig = plt.figure()

plt.plot(water['wavelength'], water['absorbance'], label='water')
plt.plot(ascorbic2['wavelength'], ascorbic2['absorbance'], label='ascorbic acid')
plt.plot(methylblue['wavelength'], methylblue['absorbance'], label='methylene blue')
plt.legend()
plt.show()

*In what regions do ascorbic acid and methylene blue absorb? What wavelength should be monitored for the kinetics?*

---
## Import decay curves

Import the decay curves (done in triplicate) and check the file metadata. Over what wavelength range was the absorption measured?

In [None]:
allfiles = !ls Files/GroupA* # Find all files in the current directory (folder) beginning with GroupA
allfiles

In [None]:
def import_file(i):
    a = pd.read_csv(allfiles[i],  
                    skiprows=5,  
                    sep='\t', 
                    usecols=[1,2],
                    names=['time', 'absorbance'])
    return a

In [None]:
A1 = import_file(0)
A2 = import_file(1)
A3 = import_file(2)
B1 = import_file(3)
B2 = import_file(4)
B3 = import_file(5)
C1 = import_file(6)
C2 = import_file(7)
C3 = import_file(8)
D1 = import_file(9)
D2 = import_file(10)
D3 = import_file(11)
E1 = import_file(12)
E2 = import_file(13)
E3 = import_file(14)

In [None]:
A1

Plot the first three decay curves (the A sample in triplicate). Then repeat (copy and paste the cell below) for the remaining samples B-E. Label them appropriately.

In [None]:
fig = plt.figure()
plt.plot(x, y, label='A1') # replace x and y
plt.plot(x, y, label='A2') # replace x and y
plt.plot(x, y, label='A3') # replace x and y

plt.xlabel('Elapsed time /s')
plt.ylabel('Absorbance')
plt.legend()
plt.show()

---
# Data cleanup
Each time series file contains a recording of absorbance next to a time stamp. These should be cleaned up before analysing further. For each decay curve:
1. Determine the time at [maximum](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.max.html) absorbance (*t*) and what position (index) in the data it is (*i*). This needs to be determined automatically for each file, so you cannot explicitly give these values. 
2. Delete the data before the time range in which the absorbance decay is smooth. This is done by resetting the index of the pandas dataframe. *(Look up [reset_index](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.reset_index.html) and the `drop` keyword)*
3. Subtract the time value of the first data point from all of the time-data so that the decay starts at *t* = 0. Check this is working correctly before applying to all the files.


In [None]:
def reset_t(f):
    
    t =          # time at maximum absorbance
    i =          # position of maximum absorbance
    
    ### create new data starting from top of decay curve (removing unwanted initial data)
    reset_f = f[i:].copy() # makes a copy of the data rather than overwriting
    reset_f =              # reset dataframe index 
    reset_f.time = reset_f.time            # subtract time value to start decay at t=0
    
    return reset_f

In [None]:
### Cleanup data
a1 = reset_t(A1)
a2 = reset_t(A2)
a3 = reset_t(A3)
Aseries = [a1, a2, a3]

Plot the A series - do they all start from *t* = 0? Repeat for the other series. You can verify by plotting each.

In [None]:
def plot_f(f):
    fig = plt.figure()
    plt.plot(f[0]['time'], f[0]['absorbance'], label='1')
    plt.plot(f[1]['time'], f[1]['absorbance'], label='2')
    plt.plot(f[2]['time'], f[2]['absorbance'], label='3')

    plt.xlabel('Elapsed time /s')
    plt.ylabel('Absorbance')
    plt.legend()
    plt.show()

In [None]:
plot_f(Aseries)

In [None]:
b1 = reset_t(B1)
b2 = reset_t(B2)
b3 = reset_t(B3)
Bseries = [b1, b2, b3]

In [None]:
c1 = reset_t(C1)
c2 = reset_t(C2)
c3 = reset_t(C3)
Cseries = [c1, c2, c3]

In [None]:
d1 = reset_t(D1)
d2 = reset_t(D2)
d3 = reset_t(D3)
Dseries = [d1, d2, d3]

In [None]:
e1 = reset_t(E1)
e2 = reset_t(E2)
e3 = reset_t(E3)
Eseries = [e1, e2, e3]

### Pseudo first-order fit

Model the data using the pseudo first-order equation $ A = A_0 e^{-kt} + B $, where $B$ is an offset which accounts for the absorbance not reaching zero, or dipping below zero. Plot your decay curves and the fitted models as $(A–B) / A_0$ vs time. Plot each series on its own graph.

In [None]:
### COMPLETE:
def first_order(t, A0, b, k):
     return 

In [None]:
def normalise(f, optP):
    """Remove B and divide by A0"""
    
    A0 = optP[0]
    b = optP[1]
    
    return (first_order(f['time'], *optP)-b)/A0

In [None]:
### For each series:
A = []  # we are creating empty list for rate constants to be added to later
B = []
C = []
D = []
E = []

In [None]:
def append_k(f, k):
    """Append the rate constant to its appropriate list"""
    
    if str(f) in str(Aseries):
        A.append(k)
    
    elif str(f) in str(Bseries):
        B.append(k)

    elif str(f) in str(Cseries):
        C.append(k)

    elif str(f) in str(Dseries):
        D.append(k)

    elif str(f) in str(Eseries):
        E.append(k)
        
    else:
        print("File doesn't match?")

In [None]:
def plot_fits(f_list):

    colours = ['navy', 'b', 'g', 'lime', 'orange', 'r']
    fig = plt.figure()
    plt.xlabel('Time /s')
    plt.ylabel('(A-B)/A0')

    counter=1
    j=0
    
    for f in f_list:
        ### Do the pseudo first order fit for each file:
        optP, pcov = opt.curve_fit(first_order, f['time'], f['absorbance'])
        A0 = optP[0]
        b = optP[1]
        k = optP[2]

        ### Append k to the appropriate list that was empty before
        append_k(f,k)
        
        print('A0 =', A0,'+/-',2*pcov[0][0]**0.5)
        print('b =', b,'+/-',2*pcov[1][1]**0.5)
        print('k =', k,'+/-',2*pcov[2][2]**0.5,'s-1')
        print('------------------------------------')
        
        ### Plotting data and fit
        plt.plot(f['time'], f['absorbance'], '.', c=colours[j], label=('exp '+str(counter)))
#        plt.plot(f['time'], fit(f['time'], *optP), "-", c=colours[j+1], label=('fit '+str(counter)))
        plt.plot(f['time'], normalise(f, optP), "-", c=colours[j+1], label=('normalised fit '+str(counter)))

        ### Plot ln(fit) vs t
#        plt.plot(f['time'], np.log(normalise(f)), "-", c=colours[j+1], label=(str(counter)))
#        plt.ylabel('ln((A-B)/A0)')

        counter += 1
        j+= 2
        
    plt.legend()
    plt.show()

In [None]:
plot_fits(Aseries)

In [None]:
plot_fits(Bseries)

In [None]:
plot_fits(Cseries)

In [None]:
plot_fits(Dseries)

In [None]:
plot_fits(Eseries)

Plot the natural logarithm of $(A – B)/A_0$ vs time for each decay curve. (This code can be activated in the `plot_fits` function above.) Is it a straight line?

---

### Determine $k_2$
Using the rate constants for each reaction make a plot of $k$ as a function of [ascorbic acid]. Determine the second order rate coefficient, $k_2$, as the slope of this plot. What are the units of this $k$?

In [None]:
### Tabulate data
all_data = pd.DataFrame([A, B, C, D, E])
all_data.columns = ['1', '2', '3']
all_data["[ascorbic acid]"] = [0.01, 0.02, 0.03, 0.04, 0.05]
all_data = all_data[["[ascorbic acid]", '1', '2', '3']]
all_data

In [None]:
### Plot rate constants vs [ascorbic acid]
ax = all_data.plot.scatter(x='[ascorbic acid]', y='1', label='1')
all_data.plot.scatter(x='[ascorbic acid]', y='2', label='2', c='orange', ax=ax);
all_data.plot.scatter(x='[ascorbic acid]', y='3', label='3', c='r', ax=ax);
plt.ylabel('k');

In [None]:
def line(x, m, b):
     return m*x + b

In [None]:
### Calculate average k from triplicate data
k2 = all_data[['1', '2', '3']]
k2.mean(axis=1)

In [None]:
x = all_data["[ascorbic acid]"]
optP, pcov = opt.curve_fit(line, x, k2.mean(axis=1))

print('slope is',optP[0],'+/-',2*pcov[0][0]**0.5)