In [None]:
from pathlib import Path
import pandas as pd


kw = {
    'skiprows': 4,
    'na_values': 'NaN',
    'sep': ',',
    'skipinitialspace': True,
    'index_col': 9,
    'encoding': 'latin1'
}


dfs = {}
p = Path('data')
for fname in sorted(p.glob('Data_CoreC*.csv')):
    print(fname)
    idx = str(fname).lower().split('data_core')[-1].split('.')[0]
    df = pd.read_csv(fname, **kw)
    dfs.update({idx: df})

In [None]:
import numpy as np

x, y = [], []

for k, df in dfs.items():
    x.extend(df['%C'].values)
    y.extend(df['TOC'].values)

carbon = pd.DataFrame(np.array([x, y]).T, columns=['%C', 'TOC']).dropna()

carbon.head()

In [None]:
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score


idx = carbon['TOC'].idxmax()
carbon = carbon.drop(idx, axis=0)

reg = linear_model.LinearRegression()

y = carbon['TOC'].values
x = carbon['%C'].values[:, None]

reg.fit(x, y)
y_pred = reg.predict(x)

R2 = mean_squared_error(y, y_pred)
var = r2_score(y, y_pred)

In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
import seaborn

kw = {
    'markersize': 8,
    'alpha': 0.95,
    'markeredgecolor': 'w',
}

glyph = {
    'co1': {'marker': 'd', 'markerfacecolor': 'SaddleBrown'},
    'co2': {'marker': 'o', 'markerfacecolor': 'MediumSeaGreen'},
    'co4': {'marker': '^', 'markerfacecolor': 'OrangeRed'},
    'co5': {'marker': 'p', 'markerfacecolor': 'DarkSlateBlue'},
}


fig, ax = plt.subplots(figsize=(7, 7))
ax = seaborn.regplot(
    x='%C', y='TOC',
    data=carbon,
    marker='d',
    color='grey',
    robust=True,
    ax=ax,
)


for k, df in dfs.items():
    ax.plot(
        df['%C'], df['TOC'],
        linestyle='none',
        label=k.upper(),
        **glyph[k],
        **kw,
        zorder=999
    )
    ax.set_xlabel('TOC_EA')
    ax.set_ylabel('TOC_RE')

leg = ax.legend(frameon=False)

ax.text(
    0.65, 0.90,
    f'R² = {var:0.2f}',
    transform=ax.transAxes);