In [1]:
import os

import pandas as pd
import numpy as np

import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine

from flask import Flask, jsonify, render_template
from flask_sqlalchemy import SQLAlchemy

import calendar


In [2]:
# Initilaize an instance
app = Flask(__name__, template_folder="build", static_url_path='/build')

# Database Setup
app.config['SECRET_KEY'] = 'Thisissupposedtobesecret!'
app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///db/coned.sqlite"

In [3]:
# app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
db = SQLAlchemy(app)

# reflect an existing database into a new model
Base = automap_base()

  'SQLALCHEMY_TRACK_MODIFICATIONS adds significant overhead and '


In [4]:
    df = pd.read_csv("final_leaks.csv")

    ## Convert the Date column to correct datetime format
    df['Date'] = pd.to_datetime(df['Date'], format = '%Y-%m-%d')

    ## get the data for the full year where date = 2013
    year_data = df[df['Date'].dt.year == 2013]

    ## get data for leaks per zipcode for that given year
    year_zip_data = pd.DataFrame(year_data.groupby('ZIP_CODE').sum()['TMAX'])
    year_zip_data.rename(columns = {'TMAX': 'Total_Leaks'}, inplace=True)
    year_data_dict = year_zip_data.to_dict()

    # set variable for monthly data
    monthly_data = year_data[year_data['Date'].dt.month == 1]
    ## get monthly data of temperatures for given month
    ## test to see if it filters out by month. 1 = January
    ## monthly_temp_data = year_data[year_data['Date'].dt.month == 1]
    monthly_temp_data = monthly_data[['Date', 'TMAX']]
    monthly_temp_data.groupby('Date')['TMAX'].max()
    ## put grouped data into its on DF
    grouped_month_temp_data = pd.DataFrame(monthly_temp_data.groupby('Date')['TMAX'].max())
    ## rename columns, then reset eindex
    grouped_month_temp_data.rename(columns = {'TMAX': 'Temp'}, inplace=True)
    grouped_month_temp_data.reset_index(level=0, inplace=True)
    ## needed to change date to string because jsonify does not support datetime values
    grouped_month_temp_data['Date'] = grouped_month_temp_data['Date'].astype(str)
    ## made the keys for the days of the month start at 1 instead of 0
    grouped_month_temp_data.index = np.arange(1,len(grouped_month_temp_data)+1)
    ## converted DF to dictionary 
    month_data_dict = grouped_month_temp_data.to_dict()
    ## Accessed the data
    final_month_dict = month_data_dict['Temp']

    ## get monthly data of leak counts per day for given month
    monthly_leaks = pd.DataFrame(monthly_data.groupby('Date')['TMAX'].count())
    monthly_leaks.rename(columns = {'TMAX': 'Number_of_Leaks'}, inplace=True)
    monthly_leaks.reset_index(level=0, inplace=True)
    monthly_leaks['Date'] = monthly_leaks['Date'].astype(str)
    monthly_leaks.index = np.arange(1,len(monthly_leaks)+1)
    monthly_leaks_dict = monthly_leaks.to_dict()
    final_monthly_leaks_dict = monthly_leaks_dict['Number_of_Leaks']


    # Please the dictionaries from above into final dictionary to respond to API request
    final_dict = {
        "ZipData:" : year_data_dict,
        "TempData" : final_month_dict,
        "MonthlyLeaks" : final_monthly_leaks_dict
    }

In [5]:
grouped_month_temp_data

Unnamed: 0,Date,Temp
1,2013-01-01,40
2,2013-01-02,33
3,2013-01-03,32
4,2013-01-04,37
5,2013-01-05,42
6,2013-01-06,46
7,2013-01-07,45
8,2013-01-08,48
9,2013-01-09,49
10,2013-01-10,47


In [6]:
leaks_dict = {}
for i in range(6):
    wh_year = 2013+i
    year_data = df[df['Date'].dt.year == wh_year]
    
    ## get data for leaks per zipcode for that given year
    year_zip_data = pd.DataFrame(year_data.groupby('ZIP_CODE').sum()['TMAX'])
    year_zip_data.rename(columns = {'TMAX': 'Total_Leaks'}, inplace=True)
#     print(i)
#     print(year_zip_data.head())
    year_zip_data.reset_index(inplace=True)
    json = year_zip_data.to_json(orient='records')
    leaks_dict[wh_year] = json


In [7]:
year_data = df[df['Date'].dt.year == wh_year]
year_zip_data = pd.DataFrame(year_data.groupby('ZIP_CODE').sum()['TMAX'])
year_zip_data.rename(columns = {'TMAX': 'Total_Leaks'}, inplace=True)
#     print(i)
#     print(year_zip_data.head())
leaks_dict[wh_year] = year_zip_data

year_zip_data.reset_index()

Unnamed: 0,ZIP_CODE,Total_Leaks
0,10001,1916
1,10002,9651
2,10003,4693
3,10004,195
4,10005,510
5,10006,348
6,10007,1549
7,10009,9165
8,10010,2014
9,10011,6129


In [8]:
leaks_dict[2014]

'[{"ZIP_CODE":10001,"Total_Leaks":5735},{"ZIP_CODE":10002,"Total_Leaks":26431},{"ZIP_CODE":10003,"Total_Leaks":12350},{"ZIP_CODE":10004,"Total_Leaks":876},{"ZIP_CODE":10005,"Total_Leaks":493},{"ZIP_CODE":10006,"Total_Leaks":260},{"ZIP_CODE":10007,"Total_Leaks":1625},{"ZIP_CODE":10009,"Total_Leaks":19602},{"ZIP_CODE":10010,"Total_Leaks":5158},{"ZIP_CODE":10011,"Total_Leaks":8611},{"ZIP_CODE":10012,"Total_Leaks":6655},{"ZIP_CODE":10013,"Total_Leaks":8124},{"ZIP_CODE":10014,"Total_Leaks":9277},{"ZIP_CODE":10016,"Total_Leaks":9198},{"ZIP_CODE":10017,"Total_Leaks":4015},{"ZIP_CODE":10018,"Total_Leaks":2180},{"ZIP_CODE":10019,"Total_Leaks":7590},{"ZIP_CODE":10020,"Total_Leaks":155},{"ZIP_CODE":10021,"Total_Leaks":13773},{"ZIP_CODE":10022,"Total_Leaks":5082},{"ZIP_CODE":10023,"Total_Leaks":7328},{"ZIP_CODE":10024,"Total_Leaks":9852},{"ZIP_CODE":10025,"Total_Leaks":16330},{"ZIP_CODE":10026,"Total_Leaks":13382},{"ZIP_CODE":10027,"Total_Leaks":18657},{"ZIP_CODE":10028,"Total_Leaks":6207},{"ZIP_C

In [9]:
df

Unnamed: 0.1,Unnamed: 0,Date,TMAX,TMIN,IM_INCIDENT_KEY,TOTAL_INCIDENT_DURATION,ACTION_TAKEN1_DESC,ACTION_TAKEN2_DESC,ZIP_CODE,BOROUGH_DESC,Incident_Time,Arrival_Date,Arrival_Time
0,0,2013-01-01,40,26,55675645,1108,44 - Hazardous materials leak control & contai...,64 - Shut down system,10455,Bronx,11:30:10 PM,01/01/2013,11:34:39 PM
1,1,2013-01-01,40,26,55675621,461,44 - Hazardous materials leak control & contai...,64 - Shut down system,10027,Manhattan,11:14:24 PM,01/01/2013,11:19:08 PM
2,2,2013-01-01,40,26,55675611,829,44 - Hazardous materials leak control & contai...,64 - Shut down system,11207,Brooklyn,11:08:08 PM,01/01/2013,11:10:30 PM
3,3,2013-01-01,40,26,55675547,1025,44 - Hazardous materials leak control & contai...,64 - Shut down system,11373,Queens,10:26:05 PM,01/01/2013,10:29:29 PM
4,4,2013-01-01,40,26,55675480,1054,44 - Hazardous materials leak control & contai...,64 - Shut down system,11360,Queens,09:33:56 PM,01/01/2013,09:39:20 PM
5,5,2013-01-01,40,26,55675481,1515,44 - Hazardous materials leak control & contai...,64 - Shut down system,10314,Staten Island,09:33:49 PM,01/01/2013,09:38:23 PM
6,6,2013-01-01,40,26,55675429,2707,44 - Hazardous materials leak control & contai...,64 - Shut down system,10019,Manhattan,09:10:36 PM,01/01/2013,09:14:47 PM
7,7,2013-01-01,40,26,55675384,899,44 - Hazardous materials leak control & contai...,64 - Shut down system,11221,Brooklyn,08:40:27 PM,01/01/2013,08:42:33 PM
8,8,2013-01-01,40,26,55675098,961,44 - Hazardous materials leak control & contai...,64 - Shut down system,10456,Bronx,06:50:03 PM,01/01/2013,06:54:08 PM
9,9,2013-01-01,40,26,55675057,1610,44 - Hazardous materials leak control & contai...,64 - Shut down system,10035,Manhattan,06:30:09 PM,01/01/2013,06:33:29 PM


In [10]:
months = pd.DataFrame(df['Date'].dt.month)
months.rename(columns = {"Date": 'Month'}, inplace=True)


In [11]:
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year

In [12]:
df

Unnamed: 0.1,Unnamed: 0,Date,TMAX,TMIN,IM_INCIDENT_KEY,TOTAL_INCIDENT_DURATION,ACTION_TAKEN1_DESC,ACTION_TAKEN2_DESC,ZIP_CODE,BOROUGH_DESC,Incident_Time,Arrival_Date,Arrival_Time,Month,Year
0,0,2013-01-01,40,26,55675645,1108,44 - Hazardous materials leak control & contai...,64 - Shut down system,10455,Bronx,11:30:10 PM,01/01/2013,11:34:39 PM,1,2013
1,1,2013-01-01,40,26,55675621,461,44 - Hazardous materials leak control & contai...,64 - Shut down system,10027,Manhattan,11:14:24 PM,01/01/2013,11:19:08 PM,1,2013
2,2,2013-01-01,40,26,55675611,829,44 - Hazardous materials leak control & contai...,64 - Shut down system,11207,Brooklyn,11:08:08 PM,01/01/2013,11:10:30 PM,1,2013
3,3,2013-01-01,40,26,55675547,1025,44 - Hazardous materials leak control & contai...,64 - Shut down system,11373,Queens,10:26:05 PM,01/01/2013,10:29:29 PM,1,2013
4,4,2013-01-01,40,26,55675480,1054,44 - Hazardous materials leak control & contai...,64 - Shut down system,11360,Queens,09:33:56 PM,01/01/2013,09:39:20 PM,1,2013
5,5,2013-01-01,40,26,55675481,1515,44 - Hazardous materials leak control & contai...,64 - Shut down system,10314,Staten Island,09:33:49 PM,01/01/2013,09:38:23 PM,1,2013
6,6,2013-01-01,40,26,55675429,2707,44 - Hazardous materials leak control & contai...,64 - Shut down system,10019,Manhattan,09:10:36 PM,01/01/2013,09:14:47 PM,1,2013
7,7,2013-01-01,40,26,55675384,899,44 - Hazardous materials leak control & contai...,64 - Shut down system,11221,Brooklyn,08:40:27 PM,01/01/2013,08:42:33 PM,1,2013
8,8,2013-01-01,40,26,55675098,961,44 - Hazardous materials leak control & contai...,64 - Shut down system,10456,Bronx,06:50:03 PM,01/01/2013,06:54:08 PM,1,2013
9,9,2013-01-01,40,26,55675057,1610,44 - Hazardous materials leak control & contai...,64 - Shut down system,10035,Manhattan,06:30:09 PM,01/01/2013,06:33:29 PM,1,2013


In [13]:
df['Month'] = df['Month'].apply(lambda x: calendar.month_abbr[x])
grouped_month_day_df  = df.groupby(['Year','Month', 'Date'], sort=False).count()['TMAX']

In [14]:
grouped_month_day_df = pd.DataFrame(grouped_month_day_df)
grouped_month_day_df.rename(columns = {'TMAX': 'Leak_Count'}, inplace =  True)

In [15]:
grouped_month_day_indexed_df = grouped_month_day_df.reset_index()
grouped_month_day_indexed_df['Date'] = grouped_month_day_indexed_df['Date'].astype(str)
grouped_month_day_indexed_df.head()

Unnamed: 0,Year,Month,Date,Leak_Count
0,2013,Jan,2013-01-01,35
1,2013,Jan,2013-01-02,46
2,2013,Jan,2013-01-03,44
3,2013,Jan,2013-01-04,33
4,2013,Jan,2013-01-05,53


In [16]:
response = {}
for row in grouped_month_day_indexed_df.values:
    
    year = row[0]
    month = row[1]
    date = row[2]
    leak_c = row[3]
    
    
    if year not in response:
        response[year] = {}
    else:
        if month not in response[year]:
            response[year][month] = {}
        else:
            if date not in response[year][month]:
                response[year][month][date] = leak_c
            else:
                pass

response

{2013: {'Jan': {'2013-01-03': 44,
   '2013-01-04': 33,
   '2013-01-05': 53,
   '2013-01-06': 42,
   '2013-01-07': 55,
   '2013-01-08': 40,
   '2013-01-09': 35,
   '2013-01-10': 40,
   '2013-01-11': 50,
   '2013-01-12': 34,
   '2013-01-13': 33,
   '2013-01-14': 48,
   '2013-01-15': 43,
   '2013-01-16': 40,
   '2013-01-17': 40,
   '2013-01-18': 30,
   '2013-01-19': 45,
   '2013-01-20': 30,
   '2013-01-21': 40,
   '2013-01-22': 35,
   '2013-01-23': 43,
   '2013-01-24': 60,
   '2013-01-25': 51,
   '2013-01-26': 30,
   '2013-01-27': 40,
   '2013-01-28': 39,
   '2013-01-29': 38,
   '2013-01-30': 48,
   '2013-01-31': 27},
  'Feb': {'2013-02-02': 57,
   '2013-02-03': 45,
   '2013-02-04': 45,
   '2013-02-05': 40,
   '2013-02-06': 43,
   '2013-02-07': 41,
   '2013-02-08': 41,
   '2013-02-09': 46,
   '2013-02-10': 41,
   '2013-02-11': 47,
   '2013-02-12': 44,
   '2013-02-13': 59,
   '2013-02-14': 41,
   '2013-02-15': 42,
   '2013-02-16': 37,
   '2013-02-17': 28,
   '2013-02-18': 52,
   '2013-02-1

In [17]:
# df['Date'] = df['Date'].astype(str)
# mergred = grouped_month_day_indexed_df.merge(df, how='inner',on='Date')[['Year_x', 'Month_x', 'Date', 'Leak_Count', 'TMAX']]
# mergred.head()

In [18]:
# response = {}
# for row in mergred.values:
    
#     year = row[0]
#     month = row[1]
#     date = row[2]
#     leak_c = row[3]
#     TMAX = row[4]
    
    
#     if year not in response:
#         response[year] = {}
#     else:
#         if month not in response[year]:
#             response[year][month] = {}
#         else:
#             if date not in response[year][month]:
#                 response[year][month] = [{'day':date,
#                                          'num_leaks':leak_c,
#                                         'temp':TMAX}]
#             else:
#                 pass

# response

In [19]:
#