In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import os
import glob

In [2]:
# Load Files Here: (extension name needed)
input_folder = 'test/'
output_folder = 'test/'
xlsx_files = glob.glob(os.path.join(input_folder, '*.xlsx'))
mpl.rc('font', family='Arial')

# Loop through each file in the folder
for input_file_path in xlsx_files:
    # Input File:
    df = pd.read_excel(input_file_path)

    # 0. Prepare the dataframe:
    input_data = pd.DataFrame(columns = ['m/z','mass','category','formula','C','H','F','Cl','Br','N','P','S',
                                         'KMD','z*','exp_KMD','O/C','F/C','abund','abund_norm','NOSC'])
    input_data['m/z'] = df['exper_mz']
    input_data['mass'] = df['lib_mass']
    input_data['category'] = df['category']
    input_data['formula'] = df['formula']
    # input_data['C'] = df['C']
    # input_data['H'] = df['H']
    # input_data['F'] = df['F']
    # input_data['Cl'] = df['Cl']
    # input_data['Br'] = df['Br']
    # input_data['N'] = df['N']
    # input_data['P'] = df['P']
    # input_data['S'] = df['S']
    input_data['KMD'] = df['KMD_CF2']
    input_data['z*'] = df['z*_CF2']
    # input_data['exp_KMD'] = (df['exper_mz'].round() - df['exper_mz']*50/49.99681).round(4)
    # input_data['O/C'] = df['O'] / df['C']
    # input_data['F/C'] = df['F'] / df['C']
    # input_data['abund'] = df['abund']
    # input_data['abund_norm'] = df['abund'] / df['abund'].max()
    # input_data['NOSC'] = 4 - ((4*df['C'] - df['F'] + df['H'] - 2*df['O'] + 4*df['S'] - 3*df['N'] - df['Cl'] + 5*df['P']) / df['C'])
    input_data['mass_defect'] = df['lib_mass'] - df['lib_mass'].round()
    
    Cat_A = input_data[input_data['category'] == 'A']
    Cat_BC = input_data[(input_data['category'] == 'B') | (input_data['category'] == 'C')].drop_duplicates(subset=['m/z'])
    Cat_D = input_data[input_data['category'] == 'D'].drop_duplicates(subset=['m/z'])
    
    # 3. Plot mass vs. KMD_CF2:
    plt.figure(figsize = [7.5, 8])
    plt.scatter(Cat_A['mass'], Cat_A['KMD'], marker='o', color='black', label='A')
    plt.scatter(Cat_BC['mass'], Cat_BC['KMD'], marker='x', color='gray', label='B&C')
    plt.scatter(Cat_D['mass'], Cat_D['KMD'], marker='3', color='silver', label='D')

    plt.xlabel('Mass (Da)', fontsize=22, labelpad=7.5)
    plt.ylabel('KMD', fontsize=22, labelpad=0)

    plt.xlim(150, 1150)
    plt.ylim(-0.4, 0.1)
    plt.xticks([200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100])  
    x_start, x_end = plt.xlim()
    y_start, y_end = plt.ylim()
    plt.xticks(np.arange(x_start, x_end, 5), minor=True)
    plt.yticks(np.arange(y_start, y_end, 0.005), minor=True)
    plt.grid(True, which='major', linewidth=.2, color='silver')
    # plt.grid(True, which='minor', linewidth=.2)
    
    plt.tick_params(axis='both', which='major', labelsize=15, pad=12.5)
    plt.tick_params(axis='both', which='both', length=0, pad=12.5)

    ax = plt.gca()
    for axis in ['top','bottom','left','right']:
        ax.spines[axis].set_linewidth(1.75)

    plt.legend(title='Category', prop={'size': 12.5}, loc='lower left')
    
    input_file_name = os.path.splitext(os.path.basename(input_file_path))[0]
    output_file_name = input_file_name.replace('_series', '') + '_KMD.png'
    output_file_path = os.path.join(output_folder, output_file_name)

    plt.savefig(output_file_path, dpi=900, bbox_inches='tight')
    plt.close()

    # 4. Plot mass vs. mass defect:    
    plt.figure(figsize = [7.5, 8])
    plt.scatter(Cat_A['mass'], Cat_A['mass_defect'], marker='o', color='black', label='A')
    plt.scatter(Cat_BC['mass'], Cat_BC['mass_defect'], marker='x', color='gray', label='B&C')
    plt.scatter(Cat_D['mass'], Cat_D['mass_defect'], marker='3', color='silver', label='D')

    plt.xlabel('Mass (Da)', fontsize=22, labelpad=7.5)
    plt.ylabel('Mass Defect', fontsize=22, labelpad=0)

    plt.xlim(150, 1150)
    plt.ylim(-0.2, 0.4)
    plt.xticks([200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100])  
    x_start, x_end = plt.xlim()
    y_start, y_end = plt.ylim()
    plt.xticks(np.arange(x_start, x_end, 5), minor=True)
    plt.yticks(np.arange(y_start, y_end, 0.005), minor=True)
    plt.grid(True, which='major', linewidth=.2, color='silver')
    # plt.grid(True, which='minor', linewidth=.2)
    
    plt.tick_params(axis='both', which='major', labelsize=15, pad=12.5)
    plt.tick_params(axis='both', which='both', length=0, pad=12.5)

    ax = plt.gca()
    for axis in ['top','bottom','left','right']:
        ax.spines[axis].set_linewidth(1.75)
    
    plt.legend(title='Category', prop={'size': 12.5}, loc='lower left')
    
    input_file_name = os.path.splitext(os.path.basename(input_file_path))[0]
    output_file_name = input_file_name.replace('_series', '') + '_MassDefects.png'
    output_file_path = os.path.join(output_folder, output_file_name)

    plt.savefig(output_file_path, dpi=900, bbox_inches='tight')
    plt.close()   