# Click "Edit App" to see the code
# Is Calc A better than Calc B ?
Python code to analyse the data provided in the writing workshop

In [None]:
# python packages
import pandas as pd # Dataframes and reading CSV files
import numpy as np # Numerical libraries
import matplotlib.pyplot as plt # Plotting library
from lmfit import Model # Least squares fitting library

Defining the parameters for the final figure, and a function for the tick marks.

In [None]:
# Define the figure's parameters
fontsize=28
figureParameters = {'figure.figsize' : (12,8),
                    'legend.fontsize': fontsize*0.7,
                    'axes.labelsize' : fontsize,
                    'axes.titlesize' : fontsize,
                    'xtick.labelsize': fontsize*0.8,
                    'ytick.labelsize': fontsize*0.8,
                    'xtick.direction': "in", # tick marks inside the frame
                    'ytick.direction': "in", # tick marks inside the frame
                    'axes.linewidth' : 3,
                    'axes.titlepad'  : 25}

from matplotlib.ticker import (MultipleLocator, 
                               FormatStrFormatter, 
                               AutoMinorLocator)
def prettyTicks(ax,xspacing,yspacing):
    # Add tick marks on all sides of the figure
    ax.xaxis.set_ticks_position('both')
    ax.yaxis.set_ticks_position('both')

    # Define the spacing of the major tick marks
    # It is useful to call this function with 0
    # for both ticks' spacing to see what the best
    # range looks like
    # number of tick marks
    if (xspacing > 0):
        ax.xaxis.set_major_locator(MultipleLocator(xspacing))
    if (yspacing > 0):
        ax.yaxis.set_major_locator(MultipleLocator(yspacing))
    
    ax.yaxis.set_minor_locator(AutoMinorLocator())
    ax.xaxis.set_minor_locator(AutoMinorLocator())

    ax.tick_params(which='minor', length=6, width=3, color='black')
    ax.tick_params(which='major', length=12, width=3, color='black')

Reading the input data

In [None]:
data = pd.read_csv("../miscData/writingWorshopData.csv")
data.columns = ("T", "Calc A" , "Calc B", "Exp")
print(data)

The experimental and calculated data should be linear with temperature, so let's fit the data, so that we can also plot the fitting line.

In [None]:
def line(x,m,q):
    return m*x + q

fitLine = Model(line)
params = fitLine.make_params(m=1,q=1)

numberOfDataSets = 3
nameOdDataSet = ["Calc A" , "Calc B", "Exp"]

T0 = 280
results = []
for i in range(0,numberOfDataSets):
    results.append(fitLine.fit(data.iloc[:,(i+1)][data["T"]>T0]  , params=params, x=data[data["T"]>T0]["T"]))


## Plot of the original data

In [None]:
plt.rcParams.update(figureParameters)

# Create the figure with the data points and their fit
fig = plt.figure()
ax = fig.gca()

prettyTicks(ax,10,2)
ax.set(xlabel="Temperature (K)")
ax.set(ylabel="Hydration free Energy (kJ/mol)")
       
marks = ['D','s','o']
for i in range(0,numberOfDataSets):
    ax.scatter(data["T"], data[nameOdDataSet[i]]  , label=nameOdDataSet[i] ,s=100, marker=marks[i])

    y = line(data["T"], results[i].best_values["m"], results[i].best_values["q"])
    ax.plot(data["T"], y, label="", lw=3)

# Make the figure
ax.legend(loc="upper left")
plt.show()

## Least square fitting results

In [None]:
for i in range(0,numberOfDataSets):
    print("    Dataset : %s " % (nameOdDataSet[i]))
    for name, par in results[i].params.items():
        print("          %s : value = %8.3f +/- %.3f " % (name, par.value, par.stderr))
    print("Chi squared : %.3f " % results[i].chisqr)
    Rsquared = 1 - results[i].residual.var() / np.var(data[nameOdDataSet[i]])
    print("  R squared : %.3f " % Rsquared)
    print("-------------------------------------------")

## Mean absolute error

In [None]:
fig = plt.figure()
ax = fig.gca()
prettyTicks(ax,10,0.5)

ax.set(xlabel="Temperature (K)")
ax.set(ylabel="Relative energy (kJ/mol)")
       
for i in range(0,numberOfDataSets-1):
    ax.scatter(data["T"], data[nameOdDataSet[i]]-data["Exp"], label=nameOdDataSet[i], s=100, marker=marks[i])
    ax.plot(data["T"], data[nameOdDataSet[i]]-data["Exp"], label="")
    
ax.axhline(y=0, linestyle="dashed", color='black')

ax.legend()
plt.show()

In [None]:
maeA = np.mean( np.absolute(data["Calc A"]-data["Exp"]) )
maeB = np.mean( np.absolute(data["Calc B"]-data["Exp"]) )
print("Mean absolute error for Calc A : %5.3f" % maeA )
print("Mean absolute error for Calc B : %5.3f" % maeB )