In [1]:
import os

import pandas as pd
import numpy as np
from scipy import stats

import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="whitegrid")

In [16]:
in_prefix = "ten_ceu_ceu_t0_p90"

num_reps = 100
metric = "iqs"

max_maf = 0.01

out_file = ".".join([in_prefix, metric, "m" + str(int(max_maf * 100)), "png"])

In [17]:
base_dir = "/Users/szhan/Projects/tsimpute/analysis/genealogy_only/"

results = None
scores = []

for i in np.arange(1, num_reps + 1):
    # i1.csv_1.csv
    in_file = base_dir + in_prefix + "/"
    in_file += "i" + str(i) + ".csv" + "_" + str(i) + ".csv"
    
    if os.path.exists(in_file):
        df = pd.read_csv(in_file, skiprows=20)
        df = df[["maf", metric]].dropna(axis=0)
        scores.append(np.mean(df[metric]))
        results = df if results is None else pd.concat([results, df])
    else:
        print(f"Not found {in_file}")

In [None]:
values = np.vstack([results["maf"], results[metric]])
kernel = stats.gaussian_kde(values)
x = kernel(np.vstack([results["maf"], results[metric]]))

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))

ax.set_title(
    f"MAF 0.00 to {max_maf}",
    size="xx-large"
)

ax.set_xlim([0, max_maf])

ax.set_ylabel(metric.upper(), size="xx-large")
ax.set_xlabel("MAF", size="xx-large")

g = sns.scatterplot(
    y=metric,
    x="maf",
    data=results,
    c=x,
    cmap="viridis",
    x_jitter=True,
    ax=ax
);

g.get_figure().savefig(out_file)