In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [None]:
df = pd.read_csv('../incidents/incidents.csv')
df.head()


# LINE CHART

Let's draw a line chart of all incidents per months (ignoring country and categories)!

In [None]:
def sort_by_date(df, date_column_name="DATE"):
    df[date_column_name] = pd.to_datetime(df[date_column_name])
    df.sort_values(by=date_column_name, inplace=True)
    return df


def fill_missing_dates(df, start_date, end_date):
    date_indices = pd.date_range(start=start_date, end=end_date, freq="MS")
    df = df.reindex(date_indices, fill_value=0)
    return df


def get_incidents_per_month(df):
    df = sort_by_date(df, date_column_name="DATE")
    incidents_per_months = df.groupby("DATE", as_index=False).count()
    incidents_per_months.rename(columns={"INCIDENT_ID":"num_of_incidents"}, inplace=True)
    incidents_per_months = incidents_per_months[["DATE", "num_of_incidents"]]
    incidents_per_months.index = incidents_per_months["DATE"]
    
    filled_incs_per_months = fill_missing_dates(incidents_per_months, 
                                                incidents_per_months.DATE.iloc[0],
                                                incidents_per_months.DATE.iloc[-1])
    filled_incs_per_months.index = filled_incs_per_months.index.format(formatter=lambda x: x.strftime("%Y-%m"))
    filled_incs_per_months = filled_incs_per_months.drop(columns = ["DATE"])
    return filled_incs_per_months


def draw_line_chart_of_monthly_incidents(df):
    incidents_per_months = get_incidents_per_month(df)
    
    plt.figure()
    plt.plot(incidents_per_months.index, incidents_per_months["num_of_incidents"], label="Number of incidents")
    plt.legend()
    plt.title('Number of incidents per months')
    plt.xticks(incidents_per_months.index, rotation=90)
    plt.show()
    

df_to_line_chart = df
draw_line_chart_of_monthly_incidents(df_to_line_chart)

# STACKED CHART

Let's draw a stacked bar chart for all incidents in a month in a country!

In [None]:
def group_df_by_date_and_countries(df):
    df = sort_by_date(df)
    grouped_df = df.groupby(["DATE", "COUNTRY"]).size().unstack()
    return grouped_df
    
    
def draw_stacked_chart_of_incidents_by_countires(df):
    df = sort_by_date(df)
    grouped_df = df.groupby(["DATE", "COUNTRY"]).size().unstack()    
    grouped_df.fillna(0, inplace=True)
    grouped_df = fill_missing_dates(grouped_df, 
                                   grouped_df.index[0],
                                   grouped_df.index[-1])
    grouped_df.index = grouped_df.index.format(formatter=lambda x: x.strftime('%Y-%m'))
    grouped_df.plot(kind="bar", stacked=True)
    plt.title("Number of incidents by countries")
    plt.legend(loc="center left", bbox_to_anchor=(1.0, 0.5))
    return grouped_df


df_to_stack_chart = df
draw_stacked_chart_of_incidents_by_countires(df_to_stack_chart)

# LINE CHART v2

Let's draw simple separate line charts for each country with all incidents per months! We have 5 countries available, so we need to elegantly generate 5 line plots, one for each country!

In [None]:
def draw_line_chart_of_incidents_by_countries(df):
    grouped_df = group_df_by_date_and_countries(df)
    grouped_df.fillna(0, inplace=True)
    grouped_df = fill_missing_dates(grouped_df, 
                                   grouped_df.index[0],
                                   grouped_df.index[-1])
    grouped_df.index = grouped_df.index.format(formatter=lambda x: x.strftime('%Y-%m'))
    
    for country in grouped_df:
        plt.plot(grouped_df.index, grouped_df[country], label="number of incidents")
        plt.xticks(grouped_df.index, rotation=90)
        plt.title(f"Number of incidents in {country}")
        plt.legend(loc="center left", bbox_to_anchor=(1.0, 0.5))
        plt.show()
        

df_to_line_chart_by_countries = df
draw_line_chart_of_incidents_by_countries(df)