In [None]:
import pandas as pd
import glob

In [None]:
all_casualty_files = glob.glob('./fatality_counts/*.csv')

In [None]:
all_pd = []

for file in all_casualty_files:
    all_pd.append(pd.read_csv(file))
    
combined = pd.concat(all_pd)

In [None]:
combined.drop_duplicates(['article_title', 'sentence'], keep = 'first', inplace=True)

In [None]:
summary = combined.groupby(['article_date', 'category']).agg({'article_id':'count'}).reset_index().pivot(index='article_date', columns='category', values='article_id').reset_index()

In [None]:
for column in ['both', 'israel', 'palestine']:
    if column not in summary:
        summary[column] = 0

summary['both'].fillna(0, inplace=True)
summary['israel'] = summary['israel'] + summary['both']
summary['palestine'] = summary['palestine'] + summary['both']

In [None]:
summary

In [None]:
summary.israel.sum(), summary.palestine.sum()

In [None]:
summary['article_date'] = pd.to_datetime(summary['article_date'])

In [None]:
summary_grouped = summary.groupby(pd.Grouper(key='article_date', freq='7D')).agg({'israel':'sum', 'palestine':'sum'}).reset_index()
summary_grouped = summary_grouped[summary_grouped['article_date'] < '2023-11-18']

summary_grouped

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

fig, ax = plt.subplots(figsize=(8, 6))

summary_plot = summary.copy()

dates = summary_plot['article_date'].astype(str).values
 
ax.plot(dates, summary_plot.palestine, label = 'Palestine', color = 'yellow')
ax.plot(dates, summary_plot.israel, label = 'Israel', color = 'blue')

ax.set_title("Mentions of Palestinian and Israeli Deaths in BBC")
ax.set_xlabel("Date")
ax.set_ylabel("Mentions of Deaths (per day)")

ax.set_facecolor("lightgray")

ax.xaxis.set_major_locator(ticker.MultipleLocator(2))
ax.legend(ncols=2, framealpha=0)
#ax.set_ybound([0,250])