# Second method of validation for the news media, (finding meaningful realation)

In [None]:
import pandas as pd
import datetime
import plotly.graph_objects as go
import plotly.express as px
from math import log10

In [None]:
abbrv = {
    "San Bernardino": "SanBernadino",
    "Houston": "Houston",
    "Washington Navy Yard": "DC",
    "Boulder": "Boulder",
    "Odessa": "Odessa",
    "Pittsburgh": "Pittsburgh",
    "Las Vegas": "Vegas",
    "Plano": "Plano",
    "Bogue Chitto": "Bogue",
    "Virginia Beach": "VirginiaBeach",
}

In [None]:
def incident_date_to_timedate(d):
    lst = [int(x) for x in str(d).split(' ')[0].split('/')]  # TODO: replace this with a strptime
    return datetime.date(lst[2], lst[0], lst[1])

def get_shootings_dates():
    df = pd.read_csv('../../v2-sampled_shootings.csv')
    
    dates = {'label': [], 'date': []}
    
    for i, d in df.iterrows():
        dates['label'].append(abbrv[d['City Or County']])
        dates['date'].append(incident_date_to_timedate(d['Incident Date']))
    
    return pd.DataFrame.from_dict(dates)

def get_intensity_data(label):
    df = pd.read_csv('../../results/intensity_data.csv')
    return df.loc[df['Location'] == label][['Days Passed', '# Articles']].sort_values(['Days Passed']).reset_index()

In [None]:
shootings = {
    'dates': [],
    'locations': [],
    'casualties': [],
}

shooting_1 = pd.read_csv('../../gun-violence-database-part-one.csv')
shooting_1['Incident Date'] =  pd.to_datetime(shooting_1['Incident Date'], format="%B %d, %Y")

shooting_2 = pd.read_csv('../../gun-violence-database-part-two.csv')
shooting_2['Incident Date'] =  pd.to_datetime(shooting_2['Incident Date'], format="%B %d, %Y")

shooting_3 = pd.read_csv('./recent_shooting_data/recent_shooting_data.csv')
shooting_3['Incident Date'] =  pd.to_datetime(shooting_3['Incident Date'], format="%B %d, %Y")

last_shooting_date = max(max(shooting_1['Incident Date']), max(shooting_2['Incident Date']))

shootings_data = pd.concat([shooting_1, shooting_2, shooting_3.loc[shooting_3['Incident Date'] > last_shooting_date]]).reset_index()

for idx, row in shootings_data.iterrows():
    if int(row['# Killed']) + int(row['# Injured']) < 4:
        continue
    date = row['Incident Date']
    location = row['City Or County']
    
    shootings['dates'].append(date)
    shootings['locations'].append(f'{location}')
    shootings['casualties'].append(int(row['# Killed']) + int(row['# Injured']))

In [None]:
colors = px.colors.qualitative.D3

fig = go.Figure()

cmin, cmax = min(shootings['casualties']), max(shootings['casualties'])

for loc, date, cas in zip(shootings['locations'], shootings['dates'], shootings['casualties']):
    fig.add_trace(go.Scatter(
        x=[date, date], y=[-1, 1800], name=f'{cas} + {loc}',
        mode='lines',
        line=dict(color='rgb({0}, {0}, {0})'.format(max(255 - 5*cas, 0)), width=.5),
        showlegend=False,
    ))
#     fig.add_vrect(
#         x0=date, x1=date + datetime.timedelta(days=1), name=loc,
#         fillcolor='rgba({0}, {0}, {0}, 0.5)'.format(max(155 - 3*cas, 0)),
#         layer="below", line_width=0,
#     )


for i, incident in get_shootings_dates().iterrows():
    intensity = get_intensity_data(incident['label'])
    
    x = [incident['date'] + datetime.timedelta(days=int(d['Days Passed'])) for _, d in intensity.iterrows()]
    y = intensity['# Articles']

    fig.add_trace(go.Scatter(
        x=x, y=y, name=incident['label'],
        mode='lines+markers',
        line=dict(color=colors[i], width=2),
        marker=dict(size=5),
    ))

    
width, height = 1500, 850

fig.update_layout(
    width=width, height=height,
    xaxis = dict(title='Date', mirror=True,),
    yaxis = dict(type='log', title='Coverage Frequency (# of articles)', mirror=True, range=(-0.3, 3.7)),
    legend_title_text='',
    legend=dict(orientation='h',x=0.1, y=1., bgcolor='rgba(0,0,0,0)'),
    template='simple_white'
)


fig.show()

fig.write_image('./coverage_frequency_number_of_casualties.png', width=width, height=height, scale=3)

In [None]:
elections = {
    2013 : {
        'elect_day': [
            ('June 25, 2013', 'Senate Election'),
            ('October 16, 2013','Senate Election'),
            ('April 9, 2013', 'Congress Election'),
            ('May 7, 2013', 'Congress Election'),
            ('June 4, 2013', 'Congress Election'),
            ('December 10, 2013', 'Congress Election'),
            ('December 17, 2013', 'Congress Election'),
            ('November 16, 2013', 'Congress Election')
        ],
        'elect_debates' : []   
    },
    2014 : {
        'elect_day': [
            ('November 4, 2014', 'Senate Election'),
            ('December 6, 2014', 'Senate Election'),
            ('November 4, 2014', 'Congress Election')
        ],
        'elect_debates': []
    },
    2015 : {
        'elect_day': [
            ('May 5, 2015', 'Congress Election'),
            ('May 12, 2015', 'Congress Election'),
            ('September 10, 2015', 'Congress Election')
        ],
        'elect_debates': []
    },
    2016 : {
        'elect_day': [
            ('November 8, 2016', 'Senate Election'),
            ('December 10, 2016', 'Senate Election'),
            ('November 8, 2016', 'Congress Election'),
            ('November 8, 2016', 'Presidential Election')
        ],
        'elect_debates': [
            ('September 26, 2016', 'Presidential Debate'),
            ('October 9, 2016', 'Presidential Debate'),
            ('October 19, 2016', 'Presidential Debate'),
            ('October 4, 2016', 'Vice Presidential Debate')
        ]
    },
    2017 : {
        'elect_day' : [
            ('December 17, 2017', 'Senate Election'),
            ('April 11, 2017', 'Congress Election'),
            ('May 25, 2017', 'Congress Election'),
            ('June 6, 2017', 'Congress Election'),
            ('June 20, 2017', 'Congress Election'),
            ('November 7, 2017', 'Congress Election')
        ],
        'elect_debates' : []
    },
    2018 : {
        'elect_day' : [
            ('November 6, 2018', 'Senate Election'),
            ('November 27, 2018', 'Senate Election'),
            ('November 6, 2018', 'Congress Election')
        ],
        'elect_debates' : []
    },
    2019 : {
        'elect_day' : [
            ('May 21, 2019', 'Congress Election'),
            ('September 10, 2019', 'Congress Election')
        ],
        'elect_debates' : []
    },
    2020 : {
        'elect_day' : [
            ('November 3, 2020', 'Senate Election'),
            ('November 3, 2020', 'Congress Election'),
            ('November 3, 2020', 'Presidential Election')
        ],
        'elect_debates' : [
            ('September 29, 2020', 'Presidential Debate'),
            ('October 22, 2020', 'Presidential Debate'),
            ('October 7, 2020', 'Vice Presidential Debate')
        ]
    },
    2021 : {
        'elect_day' : [
            ('January 5, 2021', 'Senate Election'),
            ('March 20, 2021', 'Congress Election'),
            ('April 24, 2021', 'Congress Election'),
            ('June 1, 2021', 'Congress Election'),
            ('July 27, 2021', 'Congress Election')
        ],
        'elect_debates' : []
    }
}

In [None]:
colors = px.colors.qualitative.D3

fig = go.Figure()


for year, election_infos in elections.items():
    for election_day_debate, data in election_infos.items():
        for elect_day in data:
            day_time = datetime.datetime.strptime(f'{elect_day[0]}', '%B %d, %Y')
            
            if elect_day[1] == 'Senate Election':
                fig.add_trace(go.Scatter(
                x=[day_time, day_time], y=[-1, 1800], name='Senate Election',
                mode='lines+markers',
                line=dict(color='yellow', width=1.5),
                marker = dict(size = 5),
                showlegend=False,))
                
            if elect_day[1] == 'Congress Election':
                fig.add_trace(go.Scatter(
                x=[day_time, day_time], y=[-1, 1800], name='Congress Election',
                mode='lines+markers',
                line=dict(color='lightcyan', width=1.5),
                marker = dict(size = 5),
                showlegend=False,))
                
            if elect_day[1] == 'Presidential Election':
                fig.add_trace(go.Scatter(
                x=[day_time, day_time], y=[-1, 1800], name='Presidential Election',
                mode='lines+markers',
                line=dict(color='Pink', width=1.5),
                marker = dict(size = 5),
                showlegend=False,))
                
            if elect_day[1] == 'Presidential Debate':
                fig.add_trace(go.Scatter(
                x=[day_time, day_time], y=[-1, 1800], name='Presidential Debate',
                mode='lines+markers',
                line=dict(color='silver', width=1.5),
                marker = dict(size = 5),
                showlegend=False,))
                
            if elect_day[1] == 'Vice Presidential Debate':
                fig.add_trace(go.Scatter(
                x=[day_time, day_time], y=[-1, 1800], name='Vice Presidential Debate',
                mode='lines+markers',
                line=dict(color='lightgray', width=1.5),
                marker = dict(size = 5),
                showlegend=False,))


for i, incident in get_shootings_dates().iterrows():
    intensity = get_intensity_data(incident['label'])
    
    x = [incident['date'] + datetime.timedelta(days=int(d['Days Passed'])) for _, d in intensity.iterrows()]
    y = intensity['# Articles']

    fig.add_trace(go.Scatter(
        x=x, y=y, name=incident['label'],
        mode='lines+markers',
        line=dict(color=colors[i], width=2),
        marker=dict(size=5),
    ))

    
width, height = 1500, 650

fig.update_layout(
    width=width, height=height,
    xaxis = dict(title='Date', mirror=True,),
    yaxis = dict(type='log', title='Coverage Frequency (# of articles)', mirror=True, range=(-0.3, 3.7)),
    legend_title_text='',
    legend=dict(orientation='h',x=0.1, y=1., bgcolor='rgba(0,0,0,0)'),
    template='simple_white'
)


fig.show()

fig.write_image('./coverage_frequency_elections.png', width=width, height=height, scale=3)