In [1]:
import os

import pandas as pd
import numpy as np

import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine

from flask import Flask, jsonify, render_template
from flask_sqlalchemy import SQLAlchemy

import calendar


In [2]:
# Initilaize an instance
app = Flask(__name__, template_folder="build", static_url_path='/build')

# Database Setup
app.config['SECRET_KEY'] = 'Thisissupposedtobesecret!'
app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///db/coned.sqlite"

In [3]:
# app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
db = SQLAlchemy(app)

# reflect an existing database into a new model
Base = automap_base()

  'SQLALCHEMY_TRACK_MODIFICATIONS adds significant overhead and '


In [4]:
    df = pd.read_csv("final_leaks.csv")

    ## Convert the Date column to correct datetime format
    df['Date'] = pd.to_datetime(df['Date'], format = '%Y-%m-%d')

    ## get the data for the full year where date = 2013
    year_data = df[df['Date'].dt.year == 2013]

    ## get data for leaks per zipcode for that given year
    year_zip_data = pd.DataFrame(year_data.groupby('ZIP_CODE').sum()['TMAX'])
    year_zip_data.rename(columns = {'TMAX': 'Total_Leaks'}, inplace=True)
    year_data_dict = year_zip_data.to_dict()

    # set variable for monthly data
    monthly_data = year_data[year_data['Date'].dt.month == 1]
    ## get monthly data of temperatures for given month
    ## test to see if it filters out by month. 1 = January
    ## monthly_temp_data = year_data[year_data['Date'].dt.month == 1]
    monthly_temp_data = monthly_data[['Date', 'TMAX']]
    monthly_temp_data.groupby('Date')['TMAX'].max()
    ## put grouped data into its on DF
    grouped_month_temp_data = pd.DataFrame(monthly_temp_data.groupby('Date')['TMAX'].max())
    ## rename columns, then reset eindex
    grouped_month_temp_data.rename(columns = {'TMAX': 'Temp'}, inplace=True)
    grouped_month_temp_data.reset_index(level=0, inplace=True)
    ## needed to change date to string because jsonify does not support datetime values
    grouped_month_temp_data['Date'] = grouped_month_temp_data['Date'].astype(str)
    ## made the keys for the days of the month start at 1 instead of 0
    grouped_month_temp_data.index = np.arange(1,len(grouped_month_temp_data)+1)
    ## converted DF to dictionary 
    month_data_dict = grouped_month_temp_data.to_dict()
    ## Accessed the data
    final_month_dict = month_data_dict['Temp']

    ## get monthly data of leak counts per day for given month
    monthly_leaks = pd.DataFrame(monthly_data.groupby('Date')['TMAX'].count())
    monthly_leaks.rename(columns = {'TMAX': 'Number_of_Leaks'}, inplace=True)
    monthly_leaks.reset_index(level=0, inplace=True)
    monthly_leaks['Date'] = monthly_leaks['Date'].astype(str)
    monthly_leaks.index = np.arange(1,len(monthly_leaks)+1)
    monthly_leaks_dict = monthly_leaks.to_dict()
    final_monthly_leaks_dict = monthly_leaks_dict['Number_of_Leaks']


    # Please the dictionaries from above into final dictionary to respond to API request
    final_dict = {
        "ZipData:" : year_data_dict,
        "TempData" : final_month_dict,
        "MonthlyLeaks" : final_monthly_leaks_dict
    }

In [5]:
grouped_month_temp_data

Unnamed: 0,Date,Temp
1,2013-01-01,40
2,2013-01-02,33
3,2013-01-03,32
4,2013-01-04,37
5,2013-01-05,42
6,2013-01-06,46
7,2013-01-07,45
8,2013-01-08,48
9,2013-01-09,49
10,2013-01-10,47


In [6]:
leaks_dict = {}
for i in range(6):
    wh_year = 2013+i
    year_data = df[df['Date'].dt.year == wh_year]
    
    ## get data for leaks per zipcode for that given year
    year_zip_data = pd.DataFrame(year_data.groupby('ZIP_CODE').sum()['TMAX'])
    year_zip_data.rename(columns = {'TMAX': 'Total_Leaks'}, inplace=True)
#     print(i)
#     print(year_zip_data.head())
    year_zip_data.reset_index(inplace=True)
    json = year_zip_data.to_json(orient='records')
    leaks_dict[wh_year] = json


In [7]:
year_data = df[df['Date'].dt.year == wh_year]
year_zip_data = pd.DataFrame(year_data.groupby('ZIP_CODE').sum()['TMAX'])
year_zip_data.rename(columns = {'TMAX': 'Total_Leaks'}, inplace=True)
#     print(i)
#     print(year_zip_data.head())
leaks_dict[wh_year] = year_zip_data

year_zip_data.reset_index()

Unnamed: 0,ZIP_CODE,Total_Leaks
0,10001,1916
1,10002,9651
2,10003,4693
3,10004,195
4,10005,510
5,10006,348
6,10007,1549
7,10009,9165
8,10010,2014
9,10011,6129


In [None]:
leaks_dict[2014]

In [None]:
df

In [8]:
months = pd.DataFrame(df['Date'].dt.month)
months.rename(columns = {"Date": 'Month'}, inplace=True)


In [9]:
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year

In [None]:
df

In [10]:
df['Month'] = df['Month'].apply(lambda x: calendar.month_abbr[x])
grouped_month_day_df  = df.groupby(['Year','Month', 'Date'], sort=False).count()['TMAX']

In [11]:
grouped_month_day_df = pd.DataFrame(grouped_month_day_df)
grouped_month_day_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,TMAX
Year,Month,Date,Unnamed: 3_level_1
2013,Jan,2013-01-01,35
2013,Jan,2013-01-02,46
2013,Jan,2013-01-03,44
2013,Jan,2013-01-04,33
2013,Jan,2013-01-05,53


In [13]:
grouped_month_day_indexed_df = grouped_month_day_df.reset_index()
grouped_month_day_indexed_df['Date'] = grouped_month_day_indexed_df['Date'].astype(str)
grouped_month_day_indexed_df.head()

Unnamed: 0,Year,Month,Date,TMAX
0,2013,Jan,2013-01-01,35
1,2013,Jan,2013-01-02,46
2,2013,Jan,2013-01-03,44
3,2013,Jan,2013-01-04,33
4,2013,Jan,2013-01-05,53


In [15]:
response = {}
for row in grouped_month_day_indexed_df.values:
    
    year = row[0]
    month = row[1]
    date = row[2]
    tmax = row[3]
    
    
    if year not in response:
        response[year] = {}
    else:
        if month not in response[year]:
            response[year][month] = {}
        else:
            if date not in response[year][month]:
                response[year][month][date] = tmax
            else:
                pass

response

{2013: {'Jan': {'2013-01-03': 44,
   '2013-01-04': 33,
   '2013-01-05': 53,
   '2013-01-06': 42,
   '2013-01-07': 55,
   '2013-01-08': 40,
   '2013-01-09': 35,
   '2013-01-10': 40,
   '2013-01-11': 50,
   '2013-01-12': 34,
   '2013-01-13': 33,
   '2013-01-14': 48,
   '2013-01-15': 43,
   '2013-01-16': 40,
   '2013-01-17': 40,
   '2013-01-18': 30,
   '2013-01-19': 45,
   '2013-01-20': 30,
   '2013-01-21': 40,
   '2013-01-22': 35,
   '2013-01-23': 43,
   '2013-01-24': 60,
   '2013-01-25': 51,
   '2013-01-26': 30,
   '2013-01-27': 40,
   '2013-01-28': 39,
   '2013-01-29': 38,
   '2013-01-30': 48,
   '2013-01-31': 27},
  'Feb': {'2013-02-02': 57,
   '2013-02-03': 45,
   '2013-02-04': 45,
   '2013-02-05': 40,
   '2013-02-06': 43,
   '2013-02-07': 41,
   '2013-02-08': 41,
   '2013-02-09': 46,
   '2013-02-10': 41,
   '2013-02-11': 47,
   '2013-02-12': 44,
   '2013-02-13': 59,
   '2013-02-14': 41,
   '2013-02-15': 42,
   '2013-02-16': 37,
   '2013-02-17': 28,
   '2013-02-18': 52,
   '2013-02-1

In [None]:
grouped_month_day_df.to_json(orient='records')

In [None]:
# dic = {}

# for year in df['Year'].unique():
#     dic[year] = []
#     for month in df['Month'].unique():
#         dic[year][month] = month
#         if grouped_month_day_df.loc[df['Year'] == year and df.loc[df['Month'] == month]:
#                   dic[year][month][date] = grouped_month_day_df['TMAX']
                  
        