In [None]:
import pandas as pd
import glob

In [None]:
# function to process all individual csv annotations into one dataframe
def return_df_from_files(casualty_directory):
    df = pd.concat([pd.read_csv(f) for f in glob.glob(casualty_directory+'*.csv')])
    
    # make sure we do not have any duplicate sentences for same articles
    df.drop_duplicates(['article_id', 'sentence'], keep = 'first', inplace=True)
    
    return df
    
#combined = return_df_from_files('./fatality_counts/')

In [None]:
# otherwise we can load from saved annotations

annotations_articles = pd.read_csv('./fatality_counts/summary/annotations_articles_20231007-20231202.csv')
annotations_livefeeds = pd.read_csv('./fatality_counts/summary/annotations_livefeed_20231007-20231202.csv')

combined = pd.concat([annotations_articles, annotations_livefeeds])

# set to datetime and filter for period of interest
combined['article_date'] = pd.to_datetime(combined['article_date'])
combined = combined[(combined['article_date'] > '2023-10-06') & (combined['article_date'] < '2023-12-02')]

print(combined.shape)

In [None]:
summary = combined.groupby(['article_date', 'category']).agg({'article_id':'count'}).reset_index().pivot(index='article_date', columns='category', values='article_id').reset_index()

for column in ['both', 'israel', 'palestine']:
    if column not in summary:
        summary[column] = 0
    summary[column].fillna(0, inplace=True)

summary['both'].fillna(0, inplace=True)
summary['israel'] = summary['israel'] + summary['both']
summary['palestine'] = summary['palestine'] + summary['both']

summary.head(5)

In [None]:
summary_grouped = summary.groupby(pd.Grouper(key='article_date', freq='7D')).agg({'israel':'sum', 'palestine':'sum'}).reset_index()
summary_grouped = summary_grouped[summary_grouped['article_date'] < '2023-12-03']

summary_grouped

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

fig, ax = plt.subplots(figsize=(8, 6))

summary_plot = summary.copy()

dates = summary_plot['article_date'].astype(str).values
 
ax.plot(dates, summary_plot.palestine, label = 'Palestine', color = 'yellow')
ax.plot(dates, summary_plot.israel, label = 'Israel', color = 'blue')

ax.set_title("Mentions of Palestinian and Israeli Deaths in BBC (incl. livefeed data)")
ax.set_xlabel("Date")
ax.set_ylabel("Mentions of Deaths (per day)")

ax.set_facecolor("lightgray")
plt.xticks(rotation=45)
ax.xaxis.set_major_locator(ticker.MultipleLocator(7))
ax.legend(ncols=2, framealpha=0)

plt.savefig('../outputs/bbc_mentions_20231007_to_20231202.png', bbox_inches='tight', dpi=300)