# Website
To view the website, please run all the cells below, and then click on the link at the bottom of the page.

In [1]:
from jupyter_dash import JupyterDash
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
import pandas as pd
import plotly.graph_objs as go
from jupyterlab_dash import AppViewer
import numpy as np
from scipy import stats
from math import radians, cos, sin, asin, sqrt
from statistics import mean
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split,cross_val_score, cross_val_predict
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn import metrics
from sklearn.ensemble import RandomForestRegressor,RandomForestClassifier
from sklearn.metrics import confusion_matrix


In [2]:
website_dataset = pd.read_csv('website_dataset.csv')

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


#### Data cleaning and preprocessing

In [3]:
website_dataset['Created Date'] = pd.to_datetime(website_dataset['Created Date'],errors='coerce')
website_dataset['Closed Date'] = pd.to_datetime(website_dataset['Closed Date'],errors='coerce')
website_dataset['Resolution Action Updated Date'] = pd.to_datetime(website_dataset['Resolution Action Updated Date'],errors='coerce')
website_dataset['Due Date'] = pd.to_datetime(website_dataset['Due Date'],errors='coerce')

website_dataset.drop('Agency', axis=1, inplace=True)
website_dataset.drop('Cross Street 1', axis=1, inplace=True)
website_dataset.drop('Cross Street 2', axis=1, inplace=True)
website_dataset.drop('Intersection Street 1', axis=1, inplace=True)
website_dataset.drop('Intersection Street 2', axis=1, inplace=True)
website_dataset.drop('BBL', axis=1, inplace=True)
website_dataset.drop('X Coordinate (State Plane)', axis=1, inplace=True)
website_dataset.drop('Y Coordinate (State Plane)', axis=1, inplace=True)
website_dataset.drop('Bridge Highway Segment', axis=1, inplace=True)
website_dataset.drop('Location', axis=1, inplace=True)

website_dataset.loc[website_dataset['Road Ramp'] == 'Road', 'Location Type'] = 'Road'
website_dataset.loc[website_dataset['Road Ramp'] == 'Ramp', 'Location Type'] = 'Ramp'

website_dataset.loc[(website_dataset['Facility Type'] == 'School') & (website_dataset['Location Type'].isnull()), 'Location Type'] = 'School'
website_dataset.loc[(website_dataset['Facility Type'] == 'School District') & (website_dataset['Location Type'].isnull()), 'Location Type'] = 'School'

website_dataset.drop('Road Ramp', axis=1, inplace=True)
website_dataset.drop('Facility Type', axis=1, inplace=True)

website_dataset = website_dataset[website_dataset['Latitude'].notna()]
website_dataset = website_dataset[website_dataset['Longitude'].notna()]

# replace the nan values with the status update date
website_dataset.loc[(website_dataset['Status'] == 'Closed') & (website_dataset['Closed Date'].isnull()), 'Closed Date'] = website_dataset.loc[(website_dataset['Status'] == 'Closed') & (website_dataset['Closed Date'].isnull()), 'Resolution Action Updated Date'].values

website_dataset = website_dataset.loc[(website_dataset['Status'] != 'Closed') | (website_dataset['Closed Date'].notnull())]


website_dataset = website_dataset.loc[(website_dataset['Created Date'].dt.year > 2009) & (website_dataset['Created Date'].dt.year < 2022)]
website_dataset = website_dataset.loc[website_dataset['Closed Date'].isnull() |((website_dataset['Closed Date'].dt.year > 2009) & (website_dataset['Closed Date'].dt.year < 2022))]
website_dataset = website_dataset.loc[website_dataset['Resolution Action Updated Date'].isnull() | ((website_dataset['Resolution Action Updated Date'].dt.year > 2009) & (website_dataset['Resolution Action Updated Date'].dt.year < 2022))]
website_dataset = website_dataset.loc[(website_dataset['Due Date'].dt.year > 2009) | (website_dataset['Due Date'].isnull())]

website_dataset = website_dataset.loc[(website_dataset['Longitude']> -74.5) & (website_dataset['Longitude'] < -73)]
website_dataset = website_dataset.loc[(website_dataset['Latitude']> 40) & (website_dataset['Latitude'] < 41.5)]

website_dataset = website_dataset[website_dataset['Borough'].isin(['BRONX','BROOKLYN','MANHATTAN','QUEENS','STATEN ISLAND'])]


website_dataset.loc[(website_dataset['Status'] == 'Pending'), 'Status'] = 'Unresolved'
website_dataset.loc[(website_dataset['Status'] == 'Assigned'), 'Status'] = 'Unresolved'
website_dataset.loc[(website_dataset['Status'] == 'In Progress'), 'Status'] = 'Unresolved'
website_dataset.loc[(website_dataset['Status'] == 'Open'), 'Status'] = 'Unresolved'
website_dataset.loc[(website_dataset['Status'] == 'Unassigned'), 'Status'] = 'Unresolved'

In [4]:
#Build AppViewer 
viewer = AppViewer()

# Build App
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
#app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

app = JupyterDash(__name__, external_stylesheets=external_stylesheets)

colors = {
    'background': '#fadadd',
    'text': '#FF4040'
}


start = website_dataset[website_dataset['Created Date'].dt.year == 2021]        
fig = px.scatter_mapbox(start, lat="Latitude", lon="Longitude", hover_name="Complaint Type",color="Complaint Type",
                        hover_data=["City", "Descriptor"],color_continuous_scale=px.colors.cyclical.IceFire,
                        zoom=9, height=300)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})


# preprocess the data
filter_borough_data = website_dataset[website_dataset['Borough'].isin(['MANHATTAN','BRONX','QUEENS','BROOKLYN','STATEN ISLAND'])]
labels, counts = np.unique(filter_borough_data["Borough"], return_counts=True)
labels_hours, counts_hours = np.unique(website_dataset["Created Date"].dt.hour, return_counts=True)

df_boroughs = pd.DataFrame({
    "Borough": labels,
    "Counts": counts
})

df_hours = pd.DataFrame({
    "Hour of day": labels_hours,
    "Counts": counts_hours
})
  

fig_hist_hours = px.bar(df_hours,x='Hour of day', y='Counts', color_discrete_sequence =['tomato']*len(df_boroughs))
fig_hist_hours.update_layout(
    plot_bgcolor=colors['background'],
    paper_bgcolor=colors['background']
)

fig_hist_borough =  px.bar(df_boroughs,x="Borough", y="Counts",  color_discrete_sequence =['tomato']*len(df_hours))
fig_hist_borough.update_layout(
    plot_bgcolor=colors['background'],
    paper_bgcolor=colors['background']
)

dataset_not2020 = website_dataset[website_dataset['Created Date'].dt.year != 2020]
dataset_2020 = website_dataset[website_dataset['Created Date'].dt.year == 2020]
labels_not, counts_not = np.unique(dataset_not2020["Created Date"].dt.month, return_counts=True)
labels_2020, counts_2020 = np.unique(dataset_2020["Created Date"].dt.month, return_counts=True)

df_not2020 = pd.DataFrame({
    "Month": labels_not,
    "Counts": counts_not
})

df_2020 = pd.DataFrame({
    "Month": labels_2020,
    "Counts": counts_2020
})

fig_not2020 = px.bar(df_not2020,x="Month", y="Counts",  color_discrete_sequence =['tomato']*len(df_hours), title= 'Complaints in years up to 2020')
fig_2020 = px.bar(df_2020,x="Month", y="Counts",  color_discrete_sequence =['tomato']*len(df_hours), title= 'Complaints in 2020')


complaint_options = [{'label': i, 'value': i} for i in website_dataset['Complaint Type'].unique()]
complaint_options = [{'label': 'All complaints', 'value': 'All complaints'}] + complaint_options
# Create server variable with Flask server object for use with gunicorn
server = app.server

app.layout = html.Div([
    
    html.H1(children='New York City Complaints', 
            style={'textAlign': 'center',
                    'color': colors['text']}
    ),
    
    html.Div(children='New York City is one of the most fascinating cities in the world and it attracts a steady influx of many people from all over the world. However all cities have their flaws, and New York City is not an exception. This website explores the complaints that New Yorkers have filed through the 311 service system since 2010, to try to understand what issues and struggles New Yorkers face in their day-to-day lives.', style={
                    'color': '#000000', 'padding': '20px'
                }),
    

    html.Div([
        dcc.Dropdown(
            id='category',
            options=complaint_options,
            #multi=True,
            value = 'All complaints'
        ),

    ], style={'padding': '10px 10px 10px 10px ','width': '99%','textAlign': 'center'}),
    

    # the map graph
    html.Div([
        dcc.Graph(id='map',figure=fig)
    ],style={'width': '100%', 'padding': '20px','textAlign': 'center'}),
    
    
    html.Div(dcc.Slider(
    id='crossfilter-year--slider',
    min=website_dataset['Created Date'].dt.year.min(),
    max=website_dataset['Created Date'].dt.year.max(),
    value=website_dataset['Created Date'].dt.year.max(),
    marks={str(year): str(year) for year in website_dataset['Created Date'].dt.year.unique()},
    step=None), 
        style={'width': '99%', 'padding': '20px 20px 20px 20px'}),
    
    html.H4(children='How is this complaint type distributed among the boroughs?', style={
                'color': colors['text'],
                'display': 'inline-block',
                'textAlign': 'center',
                'width': '50%',
                'padding': '20'
            }),
    
    html.H4(children='What is the hourly pattern for this complaint type?', style={
            'color': colors['text'],
            'display': 'inline-block',
            'textAlign': 'center',
            'width': '50%',
            'padding': '20'
        }),
    
    
    
    html.Div([
        dcc.Graph(
            id='across-boroughs',figure=fig_hist_borough),
        
    ], style={'width': '50%', 'display': 'inline-block', 'padding': '20px 0px 0px 0px'}),
     
     html.Div([
        dcc.Graph(
            id='across-hours',figure=fig_hist_hours)
     ], style={'width': '50%', 'display': 'inline-block','padding': '20px 0px 0px 0px'}),



    
    html.Div([
    html.H1(children='How has the Covid-19 pandemic changed complaints?', 
            style={'textAlign': 'center',
                    'color': colors['text'],'padding': '20px'}
    ),
    
    html.Div(children='The corona pandemic forced everybody in New York City into lockdown which started in March 2020. This disrupted everybodys daily lives, with most people not being able to go to work and family and friends unable to see each other. Such a significant change in people\'s lives will certainly also have an affect on the complaints that are filed by New Yorkers. Let\'s examine how the behaviour of New Yorkers has changed during the pandemic.', style={
                'color': '#000000', 'padding': '20px'
            }),
    
    html.Div(html.Img(src=app.get_asset_url('complaints_per-year.png'), 
                      style={'height':'60%', 'width':'60%'}),
    style={'textAlign': 'center'}),
        
    html.Div(children='The pandemic didn\'t seem to increase or dercrease the amount of complaints that people where making. In 2020 the number of complaints lay at nearly the exact same amount as in 2019. However, we will have to investigate some more to find out if the complaining habits of New Yorkers have changed during the pandemic. Let\'s compare the monthly trends of 2020 and the previous years (starting from 2010).' , style={
            'color': '#000000', 'padding': '20px'
        }),
    
#     html.Div(html.Img(src=app.get_asset_url('monthly_complaints.png'), style={'height':'50%', 'width':'50%'}),
#             html.Img(src=app.get_asset_url('monthly_complaints_2020.png'), style={'height':'50%', 'width':'50%'}), 
#         style={'backgroundColor': '#FFFFFF','width': '100%','display': 'inline-block'})


#     ])
        
        
    html.Div([
        html.Img(src=app.get_asset_url('monthly_complaints.png'),style={'width': '100%','height':'100%','display': 'inline-block'})
    ], style={'display': 'inline-block','width': '50%','height':'50%'}),
    
    html.Div([
        html.Img(src=app.get_asset_url('monthly_complaints_2020.png'),style={'display': 'inline-block','width': '100%', 'height':'100%'})
    
    ],style={ 'display': 'inline-block','width': '50%', 'height':'50%'}),
        
        
    html.Div(children='We can see that two complaint types that have increased a lot compared to previous years include residential noise and heating/hot water problems. Since people have been in lockdown, it makes sense that people will complain more about issues in the residential category, and not so much about things outside of the home. Indeed street condition complaints also seem to have decreased.', style={
        'color': '#000000', 'padding': '20px',
    }),
        
    html.Div(children='The Covid-19 pandemic has also seen a lot of guidelines such as face coverings and social distancing and also a lot of mandatory closure of businesses. This has lead to New Yorkers also reporting on such violations to 311. Indeed the two categories:  NonCompliance with Phased Reopening and Non-Emergency Police Matter have been increasingly reported. The first category concerns complaints about businesses and restaurants that have not closed down when they should have and the second category concerns the violation of social distancing and not wearing a mask. It is interesting to see in which parts of New York such complaints are most common.', style={
        'color': '#000000', 'padding': '20px',
    }),
        
    html.Div([
        html.Img(src=app.get_asset_url('borough_corona_guidelines.png'),style={'width': '100%','height':'100%','display': 'inline-block'})
    ], style={'display': 'inline-block','width': '50%','height':'50%'}),
    
    html.Div([
        html.Img(src=app.get_asset_url('borough_phased_reopening.png'),style={'display': 'inline-block','width': '100%', 'height':'100%'})
    
    ],style={ 'display': 'inline-block','width': '50%', 'height':'50%'}),
    
    html.Div(children='The above two graphs show that Queens has been the worst in following the lockdown restrictions, while Staten Island has the most violations of corona guidelines.', style={
        'color': '#000000', 'padding': '20px','textAlign': 'center'
    }),
        
    html.Div(children='Now we invite you to explore the data yourself and see if you can find any complaint categories where the monthly pattern in 2020 has changed a lot compared to the previous decade.', style={
        'color': '#000000', 'padding': '20px','textAlign': 'center'
    }),   
        
     html.Div([
        dcc.Dropdown(
            id='category_monthly',
            options=complaint_options,
        ),

    ], style={'padding': '10px 10px 10px 10px ','width': '99%','textAlign': 'center'}),
    
    html.Div([
        dcc.Graph(
            id='monthly_all_years',figure=fig_not2020),
        
    ], style={'width': '50%', 'display': 'inline-block', 'padding': '20px 0px 0px 0px'}),
     
     html.Div([
        dcc.Graph(
            id='monthly_2020',figure=fig_2020)
     ], style={'width': '50%', 'display': 'inline-block','padding': '20px 0px 0px 0px'}),





        
        
], style={'backgroundColor': '#FFFFFF', 'width': '100%'})



],

    
    #     html.Div([
#         dcc.Graph(id='x-time-series'),
#         dcc.Graph(id='y-time-series'),
#     ], style={'display': 'inline-block', 'width': '49%'})
    
    style={'backgroundColor': colors['background']}
)





@app.callback(
    dash.dependencies.Output('map', 'figure'),
    [dash.dependencies.Input('crossfilter-year--slider', 'value'),
     dash.dependencies.Input('category', 'value')])
def update_map(year_value,cat):

    updated = website_dataset[website_dataset['Created Date'].dt.year == year_value]
    if cat != 'All complaints':
        updated = updated[updated['Complaint Type'] == cat]

    #fig = px.scatter(x=dff[dff['Indicator Name'] == xaxis_column_name]['Value'],
    #        y=dff[dff['Indicator Name'] == yaxis_column_name]['Value'],
    #        hover_name=dff[dff['Indicator Name'] == yaxis_column_name]['Country Name']
    #        )
    
    fig = px.scatter_mapbox(updated, lat="Latitude", lon="Longitude", hover_name="Complaint Type",color="Complaint Type",
                        hover_data=["City", "Descriptor"],color_continuous_scale=px.colors.cyclical.IceFire,
                        zoom = 9, height=300)
    fig.update_layout(mapbox_style="open-street-map")
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})

    #fig.update_traces(customdata=dff[dff['Indicator Name'] == yaxis_column_name]['Country Name'])

    #fig.update_layout(margin={'l': 40, 'b': 40, 't': 10, 'r': 0}, hovermode='closest')

    return fig

@app.callback(
    dash.dependencies.Output('across-boroughs', 'figure'),
    [dash.dependencies.Input('crossfilter-year--slider', 'value'),
    dash.dependencies.Input('category', 'value')])
def update_borough_graph(year_value, cat):
    

    new_borough_data = filter_borough_data[filter_borough_data['Created Date'].dt.year == year_value]
    if cat != 'All complaints':
        new_borough_data = new_borough_data[new_borough_data['Complaint Type'] == cat]
    
    labels, counts = np.unique(new_borough_data["Borough"], return_counts=True)
    
    df_boroughs = pd.DataFrame({
        "Borough": labels,
        "Counts": counts
    })
    
    fig_hist_borough =  px.bar(df_boroughs,x="Borough", y="Counts",color_discrete_sequence =['tomato']*len(df_boroughs))
    
    fig_hist_borough.update_layout(
        plot_bgcolor=colors['background'],
        paper_bgcolor=colors['background']
    )
    return fig_hist_borough


@app.callback(
    dash.dependencies.Output('across-hours', 'figure'),
    [dash.dependencies.Input('crossfilter-year--slider', 'value'),
    dash.dependencies.Input('category', 'value')])
def update_hour_graph(year_value, cat):

    new_hour_data = website_dataset[website_dataset['Created Date'].dt.year == year_value]
    if cat != 'All complaints':
        new_hour_data = new_hour_data[new_hour_data['Complaint Type'] == cat]
    
    labels, counts = np.unique(new_hour_data["Created Date"].dt.hour, return_counts=True)
    
    df_hours = pd.DataFrame({
        "Hour of day": labels,
        "Counts": counts
    })
    
    fig_hist_hour =  px.bar(df_hours,x="Hour of day", y="Counts",color_discrete_sequence =['tomato']*len(df_boroughs))

    fig_hist_hour.update_layout(
        plot_bgcolor=colors['background'],
        paper_bgcolor=colors['background']
    )

    return fig_hist_hour

@app.callback(
    dash.dependencies.Output('monthly_all_years', 'figure'),
    [dash.dependencies.Input('category_monthly', 'value')])
def update_monthly_graph(cat):
    

    new_data = dataset_not2020 
    if cat != 'All complaints':
        new_data = dataset_not2020[dataset_not2020['Complaint Type'] == cat]
    
    labels, counts = np.unique(new_data["Created Date"].dt.month, return_counts=True)
    
    df_months = pd.DataFrame({
        "Month": labels,
        "Counts": counts
    })
    
    fig =  px.bar(df_months,x="Month", y="Counts",color_discrete_sequence =['tomato']*len(df_months), title='Complaints in years up to 2020')

    return fig


@app.callback(
    dash.dependencies.Output('monthly_2020', 'figure'),
    [dash.dependencies.Input('category_monthly', 'value')])
def update_monthly2020_graph(cat):

    new_data = dataset_2020 
    if cat != 'All complaints':
        new_data = dataset_2020[dataset_2020['Complaint Type'] == cat]
    
    labels, counts = np.unique(new_data["Created Date"].dt.month, return_counts=True)
    
    df_months = pd.DataFrame({
        "Month": labels,
        "Counts": counts
    })
    
    fig =  px.bar(df_months,x="Month", y="Counts",color_discrete_sequence =['tomato']*len(df_months), title = 'Complaints in 2020')

    return fig



viewer.show(app)
app.run_server(debug=True, use_reloader=False) 



Dash app running on http://127.0.0.1:8050/


Exception in thread Thread-57:
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/lib/python3.8/threading.py", line 1254, in run
    self.function(*self.args, **self.kwargs)
  File "/Users/christinabartozzi/jupyterlab-dash/jupyterlab_dash/__init__.py", line 63, in _perform_show
    raise IOError("""
OSError: 
Unable to communicate with the jupyterlab-dash JupyterLab extension.
Is this Python kernel running inside JupyterLab with the jupyterlab-dash
extension installed?

You can install the extension with:

$ jupyter labextension install jupyterlab-dash

