In [1]:
import os
import pandas as pd
import numpy as np

In [20]:
# Load theoretical and reference data
theoretical_data = pd.read_csv('NMR_N_AVG.csv')
reference_data = pd.read_csv('N_RCCS.csv')

In [21]:
# Merge dataframes on 'RES' and the first column of reference
merged_data = pd.merge(theoretical_data, reference_data, left_on='RES', right_on=reference_data.columns[0])


In [22]:
# Calculate chemical shift (CS) as the difference between Theoretical and Reference values
merged_data['CS'] = merged_data['SHIFT'] - merged_data.iloc[:, -1]



In [23]:
# Convert 'NUM' column to numeric
merged_data['NUM'] = pd.to_numeric(merged_data['NUM'], errors='coerce')

# Sort the dataframe based on 'NUM' values
merged_data_sorted = merged_data.sort_values(by='NUM')

# Calculate squared differences and RMSE
merged_data_sorted['Squared_Diff'] = (merged_data_sorted['CS'])**2
rmse = np.sqrt(np.mean(merged_data_sorted['Squared_Diff']))


In [24]:
# Print the result
print(merged_data_sorted[['NUM', 'RES', 'CS']])
print(f'RMSE: {rmse:.4f}')

    NUM RES      CS
0     1   G  1.2374
6     2   S -0.0826
22    3   M  3.2222
26    4   T  1.0818
7     6   S  3.1075
..  ...  ..     ...
21   88   S  3.7011
69   89   V  4.3565
66   90   K -3.5875
41   91   R -2.0457
36   92   T  2.0104

[73 rows x 3 columns]
RMSE: 3.3722


In [19]:
# Save the sorted dataframe to a new CSV file
merged_data_sorted.to_csv('dCS_CB_NMR_sorted.csv', index=False)
merged_data_sorted[['NUM', 'CS']].round({'CS': 4}).to_csv('dCS_CB_NMR_calc.csv', index=False)
