In [3]:
from sklearn.linear_model import LinearRegression
from matplotlib.dates import DateFormatter
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib as mpl
import datetime as dt
import pandas as pd
import numpy as np
import os

%matplotlib inline
mpl.rcParams["axes.labelsize"] = 12
mpl.rcParams["axes.titlesize"] = 12
mpl.rcParams["xtick.labelsize"] = 12
mpl.rcParams["ytick.labelsize"] = 12
mpl.rcParams["lines.linewidth"] = 2
mpl.rcParams["legend.fontsize"] = 8
mpl.rcParams["xtick.direction"] = "in"
mpl.rcParams["ytick.direction"] = "in"
mpl.rcParams["font.family"] = "serif"
mpl.rcParams["image.cmap"] = "viridis"

ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath("__file__")))
ALL_DIR = os.path.join(ROOT_DIR, "data", "All.csv")


In [1]:
def line_plot_count(data, title, xlabel="", ylabel=""):

    fig, ax = plt.subplots(figsize=(25, 2.5))
    fig.canvas.draw()

    ax.locator_params(axis="x", nbins=12)
    beginning = dt.date(2022, 1, 1)
    end = dt.date(2023,1,1)

    ax.plot(
        data.index.values,
        data["URL"],
        color="black",
        linewidth=1,
    )

    ax.set(
        xlabel=xlabel,
        ylabel=ylabel,
        title=title,
        xbound=(beginning, end),
    )

    date_form = DateFormatter("%b-%y")
    ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
    ax.xaxis.set_major_formatter(date_form)
    start, end = ax.get_ylim()
    ax.yaxis.set_ticks(np.arange(start, end, end / 5))

    ticksx = [tick for tick in ax.xaxis.get_majorticklabels()]
    ticksx[0].set_horizontalalignment("left")

    ticksy = ["{i:.0f}".format(i=item) for item in ax.get_yticks()]
    ticksy[0] = "0"
    ax.set_yticklabels(ticksy)

    return fig

In [2]:
sources = ["AP", "Fox", "CNN", "ABC", "CBS", "NYT", "Mirror", "Reuters", "Express", "Guardian", "DailyMail", "All"]
for source in sources:
    data_df = pd.read_csv(os.path.join(ROOT_DIR, f"results_5", source + "_Results.csv"), parse_dates=["Date"], index_col=["Date"])
    davg_df = data_df.drop(columns=["Title", "Text", "URL", "Topic"]).resample("D").mean()
    dcount_df = data_df.resample("D").apply({"URL": "count"})

    fig = line_plot_count(dcount_df, f"Daily Article Count - {source}")
    fig.savefig(os.path.join(ROOT_DIR, "figures","article_count", source + f"_Daily_Count.svg"), format="svg", dpi=1000, bbox_inches="tight")


NameError: name 'pd' is not defined