In [None]:
import os
import glob
from pathlib import Path
import datetime

import pandas as pd
import matplotlib.pyplot as plt


In [None]:
df = {"date": [], "paper_title": []}
for path in glob.glob(os.path.join("..", "**/summary.md"), recursive=True):
    path = Path(path)
    create_date = datetime.datetime.fromtimestamp(path.stat().st_ctime)    
    df["paper_title"].append(path.parent.name)
    df["date"].append(create_date)
df = pd.DataFrame(df)
df.sort_values("date", inplace=True)
df.reset_index(drop=True, inplace=True)
df.head()


In [None]:
df_week = {"date": [], "num_of_papers": []}
for idx, subdf in df.groupby(pd.Grouper(key='date', freq='W-SUN')):
    if len(subdf) > 0:
        print(idx - datetime.timedelta(days=7), idx, subdf.iloc[0]["date"], subdf.iloc[-1]["date"])
    else:
        print(idx - datetime.timedelta(days=7), idx)
    df_week["date"].append(idx)
    df_week["num_of_papers"].append(len(subdf))
df_week = pd.DataFrame(df_week)
df_week.sort_values("date", inplace=True)
df_week.reset_index(drop=True, inplace=True)
df_week.head()


In [None]:
df_week["num_of_papers(cm)"] = df_week["num_of_papers"].cumsum()
df_week["paper/day"] = df_week["num_of_papers"] / 7.
df_week["paper/day(ma)"] = df_week["paper/day"].rolling(3, min_periods=1).mean()
df_week["paper/day(ma)(diff1)"] = df_week["paper/day(ma)"].diff(1)
df_week["paper/day(ma)(diff2)"] = df_week["paper/day(ma)(diff1)"].diff(1)
df_week.head()


In [None]:
fig, axs = plt.subplots(figsize=(12, 7))
twn = axs.twinx()
axs.plot(df_week["date"], df_week["num_of_papers(cm)"], marker="o", color="darkred", label="num_of_papers(cm)")
twn.plot(df_week["date"], df_week["paper/day"], marker="o", color="black", label="paper/day")
twn.axhline(y=1.0, color="black", linestyle="dotted")
twn.axhline(y=0.7, color="gray", linestyle="dotted", alpha=0.5)
axs.set_title("Comulative sum of num of papers", fontsize=12*1.5)
fig.legend(bbox_to_anchor=(0, -0.01), loc='upper left', borderaxespad=0, fontsize=12, ncol=3)
fig.tight_layout()
fig.savefig("img/Comulative_sum_of_num_of_papers.png", bbox_inches='tight')


In [None]:
fig, axs = plt.subplots(figsize=(12, 7))
axs.plot(df_week["date"], df_week["paper/day"], marker="o", color="black", label="paper/day")
axs.plot(df_week["date"], df_week["paper/day(ma)"], marker="o", color="gray", label="paper/day(ma)")
axs.axhline(y=1.0, color="black", linestyle="dotted")
axs.axhline(y=0.7, color="gray", linestyle="dotted", alpha=0.5)
axs.set_title("Transition of paper/day", fontsize=12*1.5)
axs.set_ylim([-0.1, 1.25])
fig.legend(bbox_to_anchor=(0, -0.01), loc='upper left', borderaxespad=0, fontsize=12, ncol=3)
fig.tight_layout()
fig.savefig("img/Transition_of_paper_day1.png", bbox_inches='tight')


In [None]:
fig, axs = plt.subplots(figsize=(12, 7))
twn = axs.twinx()
axs.plot(df_week["date"], df_week["paper/day(ma)"], marker="o", color="black", label="paper/day(ma)")
axs.axhline(y=1.0, color="black", linestyle="dotted")
axs.axhline(y=0.7, color="gray", linestyle="dotted", alpha=0.5)
twn.plot(df_week["date"], df_week["paper/day(ma)(diff1)"], marker="o", color="deepskyblue", alpha=0.75, label="paper/day(ma)(diff1)")
twn.plot(df_week["date"], df_week["paper/day(ma)(diff2)"], marker="o", color="skyblue", linestyle="dashed", alpha=0.75, label="paper/day(ma)(diff2)")
twn.axhline(y=0.0, color="deepskyblue", linestyle="dotted", alpha=0.5)
axs.set_title("Transition of paper/day", fontsize=12*1.5)
axs.set_ylim([-0.05, 1.05])
twn.set_ylim([-1.05, 1.05])
fig.legend(bbox_to_anchor=(0, -0.01), loc='upper left', borderaxespad=0, fontsize=12, ncol=3)
fig.tight_layout()
fig.savefig("img/Transition_of_paper_day2.png", bbox_inches='tight')


In [None]:
df.to_csv("df.csv", index=False)


In [None]:
df_week.to_csv("df_week.csv", index=False)
