In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind, ttest_rel, ttest_1samp
import matplotlib.pyplot as plt
import seaborn as sns
import pingouin as pg
import trompy as tp

In [None]:
DATAFOLDER = Path("..\\data")
FIGSFOLDER = Path("..\\figures")
FIGSFOLDER = Path("C:\\Users\\jmc010\\Dropbox\\Publications in Progress\\IPP Giulia paper\\figs")

save_figs = False

def tweak_df(df):

    return (df
            .query("session == 's4'")
            )

df = pd.read_excel(DATAFOLDER / "metafile_ipp.xlsx", sheet_name="metafile Exp 1")
df = tweak_df(df)

## Panel A

In [None]:
def get_licks_per_solution(row):
    filename = row.medfile
    # print(f"Processing {filename}")

    L, R = tp.medfilereader(DATAFOLDER / filename, vars_to_extract=["b", "e"], remove_var_header=True)

    if "malt" in row.bottleL and "cas" in row.bottleR:
        med_var_malt, med_var_casein = L, R
    elif "malt" in row.bottleR and "cas" in row.bottleL:
        med_var_malt, med_var_casein = R, L
    else:
        raise ValueError("Bottles not found")
    
    return med_var_casein, med_var_malt

In [None]:
PR_cas, PR_malt = [], []
for row in df.query("dietgroup == 'PR'").iterrows():
    cas_licks, malt_licks = get_licks_per_solution(row[1])
    
    PR_cas.append(len(cas_licks))
    PR_malt.append(len(malt_licks))

NR_cas, NR_malt = [], []
for row in df.query("dietgroup == 'NR'").iterrows():
    cas_licks, malt_licks = get_licks_per_solution(row[1])
    
    NR_cas.append(len(cas_licks))
    NR_malt.append(len(malt_licks))

In [None]:
colors = ["xkcd:silver", "white", "xkcd:electric blue", "xkcd:powder blue"]

f, ax = plt.subplots(figsize=(2, 2),
                     gridspec_kw={"left": 0.35, "bottom": 0.1, "top": 0.95, "right": 0.95})

tp.barscatter([[NR_cas, NR_malt], [PR_cas, PR_malt],],
              ax=ax,
              paired=True,
              barfacecolor_option="individual",
              barfacecolor=colors,
              scattersize=40
            );

ax.set_yticks([0, 2000, 4000, 6000])
ax.set_ylabel("Licks")

if save_figs:
    f.savefig(FIGSFOLDER / "1A_licks_per_solution.pdf", dpi=300)


In [None]:
### Stats
data = {
    'subject': np.tile(np.arange(1, len(NR_cas) + len(PR_cas) + 1), 2),
    'diet': np.tile(["NR"] * len(NR_cas) + ["PR"] * len(PR_cas), 2),
    'solution': np.array(["cas"] * (len(NR_cas) + len(PR_cas)) + ["malt"] * (len(NR_malt) + len(PR_malt))),
    'licks': np.concatenate([NR_cas, PR_cas, NR_malt, PR_malt])
}

df_stats = pd.DataFrame(data)

aov = pg.mixed_anova(data=df_stats, dv='licks', subject='subject', within='solution', between='diet')

print(aov)

In [None]:
## Posthocs

ncomps = 4 #because NR vs PR for each solution and cas vs malt for each diet group

# solution
_, p = ttest_rel(NR_cas, NR_malt)
print("NR cas vs NR malt = {:.3f} ({:.3f} before correction))".format(tp.sidakcorr(p, ncomps=ncomps), p))

_, p = ttest_rel(PR_cas, PR_malt)
print("PR cas vs PR malt = {:.3f} ({:.3f} before correction))".format(tp.sidakcorr(p, ncomps=ncomps), p))

# diet
_, p = ttest_ind(NR_cas, PR_cas)
print("NR cas vs PR cas = {:.3f} ({:.3f} before correction))".format(tp.sidakcorr(p, ncomps=ncomps), p))

_, p = ttest_ind(NR_malt, PR_malt)
print("NR malt vs PR malt = {:.3f} ({:.3f} before correction))".format(tp.sidakcorr(p, ncomps=ncomps), p))



### Panel B

In [None]:
np.random.seed(41)

def jitter(x, n, spread=0.1):
    return x + np.random.normal(0, spread, n)

PR_pref = np.array(PR_cas) / (np.array(PR_cas) + np.array(PR_malt))
NR_pref = np.array(NR_cas) / (np.array(NR_cas) + np.array(NR_malt))

f, ax = plt.subplots(figsize=(1.4, 2),
                     gridspec_kw={"left": 0.4, "bottom": 0.1, "top": 0.95, "right": 0.95})

ax.bar( 0, np.mean(NR_pref), color=colors[0])
ax.bar( 1, np.mean(PR_pref), color=colors[2])

ax.scatter(jitter(0, len(NR_pref)), NR_pref, color="white", edgecolor="grey")
ax.scatter(jitter(1, len(PR_pref)), PR_pref, color="white", edgecolor="grey")

ax.set_ylim(0, 1.1)
ax.set_xlim(-0.55, 1.55)
ax.set_yticks([0, 0.5, 1])
ax.set_xticks([0, 1], labels=["NR", "PR"])

ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

ax.tick_params(axis='x', which='both', length=0)

ax.set_ylabel("Protein preference")

if save_figs:
    f.savefig(FIGSFOLDER / "1B_protein_preference.pdf", dpi=300)

In [None]:
# alternative version
np.random.seed(41)

def jitter(x, n, spread=0.1):
    return x + np.random.normal(0, spread, n)

PR_pref = np.array(PR_cas) / (np.array(PR_cas) + np.array(PR_malt)) - 0.5
NR_pref = np.array(NR_cas) / (np.array(NR_cas) + np.array(NR_malt)) - 0.5

f, ax = plt.subplots(figsize=(1.4, 2),
                     gridspec_kw={"left": 0.4, "bottom": 0.1, "top": 0.95, "right": 0.95})

ax.bar( 0, np.mean(NR_pref), color=colors[0])
ax.bar( 1, np.mean(PR_pref), color=colors[2])

# xlen=0.3
# ax.plot([0-xlen, 0+xlen], [np.mean(NR_pref), np.mean(NR_pref)], color="black", lw=1.5)
# ax.plot([1-xlen, 1+xlen], [np.mean(PR_pref), np.mean(PR_pref)], color=colors[2], lw=1.5)

ax.scatter(jitter(0, len(NR_pref)), NR_pref, color="white", edgecolor="grey")
ax.scatter(jitter(1, len(PR_pref)), PR_pref, color="white", edgecolor="grey")

ax.set_ylim(-0.5, 0.6)
ax.set_xlim(-0.55, 1.55)
ax.set_yticks([-0.5,0, 0.5], labels=["0", "0.5", "1"])
ax.set_xticks([0, 1], labels=["NR", "PR"])

ax.axhline(0, color="black", lw=0.75, ls="--", zorder=0, alpha=0.5)

ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["bottom"].set_visible(False)

ax.tick_params(axis='x', which='both', length=0)

ax.set_ylabel("Protein preference")

if save_figs:
    f.savefig(FIGSFOLDER / "1B_protein_preference.pdf", dpi=300)

In [None]:
## Statistics
t, p = ttest_1samp(NR_pref, 0.5)
print("NR preference: t={:.3f}, p={:.3f}".format(t, p))

t, p = ttest_1samp(PR_pref, 0.5)
print("PR preference: t={:.3f}, p={:.3f}".format(t, p))



### Panels C and D

In [None]:
def make_protein_pref_heatmap(casein_licks, malt_licks, bins):

    casein_hist = np.histogram(casein_licks, bins=bins)[0]
    malt_lickdata = np.histogram(malt_licks, bins=bins)[0]

    casein_cumsum = np.cumsum(casein_hist)
    malt_cumsum = np.cumsum(malt_lickdata)

    preference_cumsum = []
    for casbin, maltbin in zip(casein_cumsum, malt_cumsum):
        preference_cumsum.append(casbin/(maltbin+casbin))

    return preference_cumsum

def get_heatmap_data_from_row(row, **kwargs):
    
    casein_licks, malt_licks = get_licks_per_solution(row)

    if "bins" in kwargs:
        bins = kwargs["bins"]
    else:
        bins = np.linspace(0,3600, 13)
    
    data = make_protein_pref_heatmap(casein_licks, malt_licks, bins)

    return data

# get_heatmap_data_from_row(df.iloc[0], bins=np.linspace(0,3600, 61))

In [None]:
bins=np.linspace(0,3600, 13)

PR_data = []
for row in df.query("dietgroup == 'PR'").iterrows():
    data = get_heatmap_data_from_row(row[1], bins=bins)
    PR_data.append(data)

NR_data = []
for row in df.query("dietgroup == 'NR'").iterrows():
    data = get_heatmap_data_from_row(row[1], bins=bins)
    NR_data.append(data)


In [None]:
def sort_by_last_value(data):
    data = np.array(data)
    return data[np.argsort(data[:, -1])]

PR_data_to_plot = sort_by_last_value(PR_data)
NR_data_to_plot = sort_by_last_value(NR_data)

def make_heatmap_fig(data, ax, cbar_ax=False, show_x_axis=True, **kwargs):
    
    if cbar_ax:
        sns.heatmap(data, ax=ax,
                    cmap="coolwarm",
                    vmin=0, vmax=1,
                    cbar_ax=cbar_ax
                    )
        
            # Rotate the colorbar label
        # cbar = cbar_ax.collections[0].colorbar
        # cbar.set_label('Protein preference', rotation=270, labelpad=15)
        
        ax3.set_ylabel('Protein pref.', rotation=270, labelpad=15, fontsize=10)
        ax3.set_yticks([0, 0.5, 1], labels=["0.0", "0.5", "1.0"], fontsize=8)
        ax3.tick_params(axis='y', which='both', length=0)
        
    else:
        sns.heatmap(data, ax=ax,
                cmap="coolwarm",
                vmin=0, vmax=1,
                cbar=False
                )

    ax.invert_yaxis()
    ax.set_yticks([])

    ticks = np.linspace(0, data.shape[1], 7)
    
    if show_x_axis:

        ax.set_xticks(ticks,
                    labels=[0, 10, 20, 30, 40, 50, 60],
                    rotation=0)
        ax.set_xlabel("Time (min)")
        
    else:
        ax.set_xticks([])

f, [ax1, ax2, ax3] = plt.subplots(ncols=3, figsize=(4.8,1),
                                  gridspec_kw={"width_ratios": [1, 1, 0.1],
                                               "left": 0.15, "bottom": 0.1, "top": 0.95, "right": 0.8}
                                  )

make_heatmap_fig(NR_data_to_plot, ax1, cbar_ax=False, show_x_axis=False)
make_heatmap_fig(PR_data_to_plot, ax2, cbar_ax=ax3, show_x_axis=False)

if save_figs:
    f.savefig(FIGSFOLDER / "1C_heatmaps.pdf", dpi=300)

### Panels E and F

In [None]:
def get_cumsum_licks(licks, bins=np.linspace(0, 3600, 61)):
    hist = np.histogram(licks, bins=bins)[0]
    return np.cumsum(hist)

bins=np.linspace(0, 3600, 61)

PR_cas, PR_malt = [], []
for row in df.query("dietgroup == 'PR'").iterrows():
    cas_licks, malt_licks = get_licks_per_solution(row[1])
    
    PR_cas.append(get_cumsum_licks(cas_licks))
    PR_malt.append(get_cumsum_licks(malt_licks))

NR_cas, NR_malt = [], []
for row in df.query("dietgroup == 'NR'").iterrows():
    cas_licks, malt_licks = get_licks_per_solution(row[1])
    
    NR_cas.append(get_cumsum_licks(cas_licks))
    NR_malt.append(get_cumsum_licks(malt_licks))
    
PR_cas = np.array(PR_cas)
PR_malt = np.array(PR_malt)
NR_cas = np.array(NR_cas)
NR_malt = np.array(NR_malt)

In [None]:
def make_cumsum_fig(cas_licks, malt_licks, bins, colors, ax):
    
    cas_sem = np.std(cas_licks, axis=0) / np.sqrt(cas_licks.shape[0])
    malt_sem = np.std(malt_licks, axis=0) / np.sqrt(malt_licks.shape[0])
    
    ax.fill_between(bins[:-1], np.mean(cas_licks, axis=0)-cas_sem, np.mean(cas_licks, axis=0)+cas_sem, color=colors[0], alpha=0.5)
    ax.plot(bins[:-1], np.mean(cas_licks, axis=0), color=colors[1], label="Casein")

    ax.fill_between(bins[:-1], np.mean(malt_licks, axis=0)-malt_sem, np.mean(malt_licks, axis=0)+malt_sem, color=colors[0], alpha=0.5)
    ax.plot(bins[:-1], np.mean(malt_licks, axis=0), color=colors[2], label="Maltodextrin")
    
    

f, [ax1, ax2, ax3] = plt.subplots(ncols=3, figsize=(4.8,2),
                                  sharey=True,
                                  gridspec_kw={"width_ratios": [1, 1, 0.1],
                                               "left": 0.15, "bottom": 0.21, "top": 0.95, "right": 0.8}
                                  )

colors_NR = ["xkcd:silver", "black", "xkcd:grey"]
colors_PR = ["xkcd:silver", "xkcd:electric blue", "xkcd:powder blue"]

make_cumsum_fig(NR_cas, NR_malt, bins, colors_NR, ax1)
make_cumsum_fig(PR_cas, PR_malt, bins, colors_PR, ax2)

for ax in [ax1, ax2]:
    ax.set_xlim(-200, 3600)
    ax.set_xticks([0, 1800, 3600], labels=[0, 30, 60])
    ax.set_xlabel("Time (min)")
    
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)

ax1.set_ylabel("Cumulative licks")

ax3.axis("off")

if save_figs:
    f.savefig(FIGSFOLDER / "1D_cumulative_licks.pdf", dpi=300)


In [None]:
# Linear regression stats
PR_cas
import statsmodels.api as sm
from statsmodels.formula.api import mixedlm

df_stats = pd.Data

In [None]:
# analysis on grouped data
import statsmodels.api as sm
from statsmodels.formula.api import ols

df_stats = pd.DataFrame(data={"casein": tp.flatten_list(PR_cas),
                              "malt": tp.flatten_list(PR_malt)
                              })

In [None]:
def get_intake_per_solution(row):

    if "cas" in row.bottleL and "malt" in row.bottleR:
        return row.intakeL, row.intakeR
    elif "cas" in row.bottleR and "malt" in row.bottleL:
        return row.intakeR, row.intakeL
    else:
        raise ValueError("Bottles not found")
    
PR_cas, PR_malt = [], []
for row in df.query("dietgroup == 'PR'").iterrows():
    cas_ml, malt_ml = get_intake_per_solution(row[1])

    PR_cas.append(cas_ml)
    PR_malt.append(malt_ml)

NR_cas, NR_malt = [], []
for row in df.dropna().query("dietgroup == 'NR'").iterrows():
    cas_ml, malt_ml = get_intake_per_solution(row[1])

    print(cas_ml, malt_ml)
    
    NR_cas.append(cas_ml)
    NR_malt.append(malt_ml)

In [None]:
f, ax = plt.subplots(figsize=(2, 2),
                     gridspec_kw={"left": 0.35, "bottom": 0.1, "top": 0.95, "right": 0.95})

tp.barscatter([[NR_cas, NR_malt], [PR_cas, PR_malt],],
              ax=ax,
              paired=True,
              barfacecolor_option="individual",
              barfacecolor=colors,
              scattersize=40
            );

ax.set_yticks([0, 10, 20])
ax.set_ylabel("Licks")

if save_figs:
    f.savefig(FIGSFOLDER / "S1A_licks_per_solution.pdf", dpi=300)

In [None]:
### Stats
data = {
    'subject': np.tile(np.arange(1, len(NR_cas) + len(PR_cas) + 1), 2),
    'diet': np.tile(["NR"] * len(NR_cas) + ["PR"] * len(PR_cas), 2),
    'solution': np.array(["cas"] * (len(NR_cas) + len(PR_cas)) + ["malt"] * (len(NR_malt) + len(PR_malt))),
    'licks': np.concatenate([NR_cas, PR_cas, NR_malt, PR_malt])
}

df_stats = pd.DataFrame(data)

aov = pg.mixed_anova(data=df_stats, dv='licks', subject='subject', within='solution', between='diet')

print(aov)

In [None]:
## Posthocs

ncomps = 4 #because NR vs PR for each solution and cas vs malt for each diet group

# solution
_, p = ttest_rel(NR_cas, NR_malt)
print("NR cas vs NR malt = {:.3f} ({:.3f} before correction))".format(tp.sidakcorr(p, ncomps=ncomps), p))

_, p = ttest_rel(PR_cas, PR_malt)
print("PR cas vs PR malt = {:.3f} ({:.3f} before correction))".format(tp.sidakcorr(p, ncomps=ncomps), p))

# diet
_, p = ttest_ind(NR_cas, PR_cas)
print("NR cas vs PR cas = {:.3f} ({:.3f} before correction))".format(tp.sidakcorr(p, ncomps=ncomps), p))

_, p = ttest_ind(NR_malt, PR_malt)
print("NR malt vs PR malt = {:.3f} ({:.3f} before correction))".format(tp.sidakcorr(p, ncomps=ncomps), p))



In [None]:
np.random.seed(41)

def jitter(x, n, spread=0.1):
    return x + np.random.normal(0, spread, n)

def calc_pref(cas, malt):
    if malt == 0:
        return 1
    elif cas == 0:
        return 0
    else:
        return cas / (cas + malt)

PR_pref = np.array([calc_pref(cas, malt) for cas, malt in zip(PR_cas, PR_malt)])
NR_pref = np.array([calc_pref(cas, malt) for cas, malt in zip(NR_cas, NR_malt)])

f, ax = plt.subplots(figsize=(1.4, 2),
                     gridspec_kw={"left": 0.4, "bottom": 0.1, "top": 0.95, "right": 0.95})

ax.bar( 0, np.mean(NR_pref), color=colors[0])
ax.bar( 1, np.mean(PR_pref), color=colors[2])

ax.scatter(jitter(0, len(NR_pref)), NR_pref, color="white", edgecolor="grey")
ax.scatter(jitter(1, len(PR_pref)), PR_pref, color="white", edgecolor="grey")

ax.set_ylim(0, 1.1)
ax.set_xlim(-0.55, 1.55)
ax.set_yticks([0, 0.5, 1])

ax.set_xticks([0, 1], labels=["NR", "PR"])

ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

ax.tick_params(axis='x', which='both', length=0)

ax.set_ylabel("Protein preference")

if save_figs:
    f.savefig(FIGSFOLDER / "S1B_protein_preference.pdf", dpi=300)

In [None]:
# to correlate licks with intake
cas_licks, cas_intake, malt_licks, malt_intake = [], [], [], []

for row in df.dropna().iterrows():
    cas_licks_, malt_licks_ = get_licks_per_solution(row[1])
    cas_intake_, malt_intake_ = get_intake_per_solution(row[1])
    
    cas_licks.append(len(cas_licks_))
    malt_licks.append(len(malt_licks_))
    cas_intake.append(cas_intake_)
    malt_intake.append(malt_intake_)

cas_licks = np.array(cas_licks)
malt_licks = np.array(malt_licks)   
cas_intake = np.array(cas_intake)
malt_intake = np.array(malt_intake)


In [None]:
cas_intake.shape

In [None]:
from scipy.stats import linregress

def make_corr_plot(x, y, ax=None, color="black", limit_range=False):

    if ax is None:
        f, ax = plt.subplots(figsize=(2, 2))
    
    ax.scatter(x, y, color=color, alpha=0.5)

    # to limit to the range of malt data
    if limit_range:
        x_ = x[y > 4000]
        y_ = y[y > 4000]
        x = x[y < 4000]
        y = y[y < 4000]

    ax.scatter(x, y, color=color, alpha=0.5)
    ax.scatter(x_, y_, facecolors='white', edgecolors='grey', alpha=0.5)

    slope, intercept, r_value, p_value, std_err = linregress(x, y)
    x_vals = np.array(ax.get_xlim())
    regression_line = slope * x_vals + intercept

    ax.plot(x_vals, regression_line, color="black", alpha=0.5, linestyle="--")

    ax.text(0.5, 0.2, f"{slope:.0f} licks/mL",
            transform=ax.transAxes, ha="left")
    
    print(slope)

f, [ax1, ax2] = plt.subplots(ncols=2, sharey=True,
                     gridspec_kw={"wspace": 0.5},
                     figsize=(5, 2)
                     )
                     
make_corr_plot(cas_intake, cas_licks, ax=ax1, color="black", limit_range=True)
make_corr_plot(malt_intake, malt_licks, ax=ax2, color="black", limit_range=True)

for axis in [ax1, ax2]:
    axis.spines["top"].set_visible(False)
    axis.spines["right"].set_visible(False)
    axis.set_xticks([0, 10, 20])
    axis.set_yticks([0, 2000, 4000])
    axis.set_xlabel("Intake (mL)")

ax1.set_ylabel("Licks")

if save_figs:
    f.savefig(FIGSFOLDER / "S1C_licks_vs_intake.pdf", dpi=300)


