In [1]:
import pandas as pd # library for data analysis
import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML documents


In [2]:
# get the response in the form of html
wikiurl="https://en.wikipedia.org/wiki/List_of_school_shootings_in_the_United_States_(2000%E2%80%93present)#References"
table_class="wikitable sortable jquery-tablesorter"
response=requests.get(wikiurl)
print(response.status_code)

200


In [3]:
# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(response.text, 'html.parser')
indiatables = []
for x in ["2000s", "2010s", "2020s"]:
    indiatables.append(soup.find('table',{'class':"wikitable", 'id': x}))

In [4]:
frames = []
for indiatable in indiatables:
    df=pd.read_html(str(indiatable))
    # convert list to dataframe
    df=pd.DataFrame(df[0])
    df.replace(r'\[[^)]*\]', '', regex=True, inplace=True)
    df.columns = df.columns.droplevel(1)
    # Identify rows where the value of each column is the same
    is_duplicate = df.apply(lambda row: row.nunique() == 2, axis=1)
    df = df.drop(df[is_duplicate].index)
    df['Deaths'] = df['Deaths'].astype(int)
    df['Injuries'] = df['Injuries'].astype(int)
    df['Total'] = df['Total'].astype(int)
    df['short_description'] = df['Description'].str[:100] + '...'
    frames.append(df)
# Concatenate all data into one DataFrame
df = pd.concat(frames)

df = df.reset_index(drop=True)
df['Total'] = df.loc[df['Total'] != 0, 'Total']
df = df.dropna()
df = df.reset_index(drop=True)
df['Total'] = df['Total'].astype(int)

# save the dataframe to a csv file
df.to_csv('school_shootings.csv', index=False, encoding='utf-8')

In [171]:
df

Unnamed: 0,Date,Location,Deaths,Injuries,Total,Description,short_description
0,"February 29, 2000","Flint, Michigan",1,0,1,Killing of Kayla Rolland: At Buell Elementary ...,Killing of Kayla Rolland: At Buell Elementary ...
1,"May 26, 2000","Lake Worth, Florida",1,0,1,"13-year-old honor student, Nathaniel Brazill w...","13-year-old honor student, Nathaniel Brazill w..."
2,"June 28, 2000","Seattle, Washington",2,0,2,58-year-old Director of the Division of Pathol...,58-year-old Director of the Division of Pathol...
3,"August 28, 2000","Fayetteville, Arkansas",2,0,2,"36-year-old James Easton Kelly, a PhD candidat...","36-year-old James Easton Kelly, a PhD candidat..."
4,"September 26, 2000","New Orleans, Louisiana",0,2,2,13 year-olds Darrel Johnson and Alfred Anderso...,13 year-olds Darrel Johnson and Alfred Anderso...
...,...,...,...,...,...,...,...
400,"March 22, 2023","Denver, Colorado",1,2,3,"17-year-old Austin Lyle, a student at East Hig...","17-year-old Austin Lyle, a student at East Hig..."
401,"March 27, 2023","Nashville, Tennessee",7,1,8,2023 Covenant School shooting: A shooting occu...,2023 Covenant School shooting: A shooting occu...
402,"March 30, 2023","Winston-Salem, North Carolina",0,1,1,Shots were fired on the Forsyth Technical Comm...,Shots were fired on the Forsyth Technical Comm...
403,"April 10, 2023","Louisville, Kentucky",1,1,2,Two people were shot outside a building on the...,Two people were shot outside a building on the...


In [7]:
import pandas as pd
import plotly.graph_objs as go

# Load data into a pandas DataFrame
#df = pd.read_csv('your_data.csv')

# Convert 'date' column to datetime format and sort DataFrame
#df['Date_old'] = df['Date'].copy(deep=True)
df['Date'] = pd.to_datetime(df['Date'])
df['cumulative_deaths'] = df['Deaths'].cumsum()
df['cumulative_injured'] = df['Injuries'].cumsum()
df['wrapped'] = df['Description'].str.wrap(60)
df['wrapped'] = df['wrapped'].apply(lambda x: x.replace('\n', '<br>'))
df = df.sort_values('Date')

# Create scatter plot with hover text
fig = go.Figure(
    go.Scatter(
        x=[df['Date'][0]],
        y=[df['Total'][0]],
        mode='markers',
        marker=dict(
            color='Red',
            size=10,
            line=dict(
                color='DarkSlateGrey',
                width=1
            )),
        text=df['wrapped'][0],
        customdata=df[['Location', 'Deaths', 'Injuries', 'wrapped', 'Date']].values.tolist(),
        hovertemplate="<b>%{x|%b %d, %Y}</b><br>" +
                      "<b>Location:</b> %{customdata[0]}<br>" +
                      "<b>Number Killed:</b> %{customdata[1]}<br>" +
                      "<b>Number Injured:</b> %{customdata[2]}<br>" +
                      "<b>Description:</b> %{customdata[3]}<extra></extra>",
    ),
    # layout_xaxis_range = ([df['Date'].min(), df['Date'].max()]),
    # layout_yaxis_range = ([0, 60])
    layout=go.Layout(title=f"School Shootings this Century - {df['Date'][0].strftime('%Y-%m-%d')}", 
                                    #xaxis=dict(range=[df['Date'][:i+1].min(), df['Date'][:i+1].max()]),
                                    annotations=[dict(
                                        text="Dead: " + str(df['cumulative_deaths'][0]) + "<br>" +
                                            "Injured: " + str(df['cumulative_injured'][0]),
                                        x=0.05,
                                        y=0.95,
                                        xref="paper",
                                        yref="paper",
                                        showarrow=False,
                                        font=dict(
                                            size=20,
                                            color="black"
                                        )
                                    )],
                                    xaxis=dict(range=[df['Date'].min(), df['Date'].max()]),
                                    yaxis=dict(range=[0, 60]),
                                    yaxis_title="Number of Deaths + Injuries")
)
# Add animation to scatter plot
frames = [go.Frame(data=[go.Scatter(x=df['Date'][:i+1], y=df['Total'], mode='markers',
                                     marker=dict(
                                            color='Red',
                                            size=10,
                                            line=dict(
                                                color='DarkSlateGrey',
                                                width=1
                                            )),
                                     text=df['wrapped'][:i+1],
                                     customdata=df[['Location', 'Deaths', 'Injuries', 'wrapped', 'Date']].values.tolist(),
        hovertemplate="<b>%{x|%b %d, %Y}</b><br>" +
                      "<b>Location:</b> %{customdata[0]}<br>" +
                      "<b>Number Killed:</b> %{customdata[1]}<br>" +
                      "<b>Number Injured:</b> %{customdata[2]}<br>" +
                      "<b>Description:</b> %{customdata[3]}<extra></extra>")],
                   layout=go.Layout(title=f"School Shootings this Century - {df['Date'][i].strftime('%Y-%m-%d')}", 
                                    #xaxis=dict(range=[df['Date'][:i+1].min(), df['Date'][:i+1].max()]),
                                    annotations=[dict(
                                        text="Dead: " + str(df['cumulative_deaths'][i]) + "<br>" +
                                            "Injured: " + str(df['cumulative_injured'][i]),
                                        x=0.05,
                                        y=0.95,
                                        xref="paper",
                                        yref="paper",
                                        showarrow=False,
                                        font=dict(
                                            size=20,
                                            color="black"
                                        )
                                    )],
                                    xaxis=dict(range=[df['Date'].min(), df['Date'].max()]),
                                    yaxis=dict(range=[0, 60]),
                                    yaxis_title="Number of Deaths + Injuries"),
                   ) for i in range(1, len(df))]

fig.frames += tuple(frames)

fig.update_layout(
    # paper_bgcolor='rgba(0,0,0,0)',
    # plot_bgcolor='rgba(0,0,0,0)',
    xaxis_title='Date',
    yaxis_title='Number of Deaths + Injuries',
    hovermode='closest',
    plot_bgcolor='white',
    updatemenus=[dict(
        type='buttons',
        showactive=True,
        x=1,
        y=1,
        xanchor='right',
        yanchor='top',
        buttons=[dict(
            label='Play',
            method='animate',
            args=[None, dict(frame=dict(duration=100, redraw=True), fromcurrent=True, transition=dict(duration=0))]
        ),
        dict(
            label='Pause',
            method='animate',
            args=[[None], dict(frame=dict(duration=0, redraw=False), mode='immediate', transition=dict(duration=0))]
        )]
    )]
)

# Show the interactive plot
fig.show(renderer="browser")
fig.write_html("incidents.html")
