# Report generation

In [29]:
from datetime import datetime

import math
import pickle
from os import listdir
from os.path import isfile, join
import os
import csv

from flask import Flask, render_template, request
import tensorflow as tf
from tensorflow import keras
import copy
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from numpy import concatenate
import warnings
from tqdm import tqdm
import pandas

In [30]:
def get_station_names():
    mypath = '../datasets/bss/dublin/ml_models/'
    files = [f for f in listdir(mypath) if isfile(join(mypath, f))]

    station_ids = [x.split('.')[0].split('_')[1] for x in files]

    output = {}

    for sid in station_ids:
        csv_file = csv.reader(open('../datasets/bss/dublin/original/dublin.csv', "r"), delimiter=",")
        for row in csv_file:
            if sid == row[0]:
                output[sid] = row[1]

    return output


def simple_predict(station_id, int_time, int_date, int_day):
    destination_directory = '../datasets/bss/dublin/simple_ml_models/'
    scaler_destination_directory = copy.deepcopy(destination_directory) + 'scalers/'

    model = tf.keras.models.load_model(destination_directory + 'station_' + str(station_id) + '.h5')

    file = open(scaler_destination_directory + 'station_' + str(station_id) + '.pkl', "rb")
    scaler = pickle.load(file)
    file.close()

    params = np.array([0, int_time, int_date, int_day])
    params = params.reshape(1, -1)
    params = scaler.transform(params)
    params = np.array([params])
    params = params.tolist()
    params[0][0].pop(0)
    params = np.array(params)

    answer = model.predict(params)
    full_row = concatenate((answer, params[0]), axis=1)
    inv_row = scaler.inverse_transform(full_row)
    
    return inv_row[0][0]

def get_station_capacities():
    df = pandas.read_csv('../datasets/bss/dublin/original/dublinbikes_20180701_20181001.csv',
                    usecols=['STATION ID', 'BIKE STANDS'])
    df = df.drop_duplicates()
    output = {}
    for index, row in df.iterrows():
        output[int(row['STATION ID'])] = int(row['BIKE STANDS'])
            
    return output


In [31]:
d = datetime(year=2019, month=12, day=3)

In [32]:
def generate_report(date=datetime(year=2019, month=8, day=1)):
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
    destination_directory = '../outputs/date_reports/'
    if not os.path.exists(destination_directory):
        os.makedirs(destination_directory)
    
    if os.path.exists(destination_directory + date.strftime("%Y-%m-%d") +  '.csv'):
        print("file already exists")
        return
    
    with open(destination_directory + date.strftime("%Y-%m-%d") +  '.csv', 'w', newline='') as csvfile:
        times = ["station"]
        for x in range(24):
            times.append(str(x) + ":00")
        
        writer = csv.DictWriter(csvfile, fieldnames=times)

        writer.writeheader()
        
        stations = get_station_names()
        
        for sid, name in tqdm(stations.items()):
            answers = {"station":sid}
            for x in range(24):
                answers[str(x) + ":00"] = (
                    round(
                        simple_predict(
                            sid,
                            (x * 12),
                            date.strftime('%j'),
                            date.strftime('%w')
                        )
                    )
                )
            writer.writerow(answers)
            

generate_report(d)

file already exists


In [35]:
def get_overunder_population(pop_dict):
    # Get the maximum capacity of each station as dict
    station_caps = get_station_capacities()
    
    overpopulation_list = {}
    underpopulation_list = {}
    for station_id, pop in pop_dict.items():

        if pop < 0:
            continue

        try:
            if int(station_caps[station_id] * danger_capacity) < pop:
                overpopulation_list[station_id] = [
                                                        int(pop - (station_caps[station_id] * optimal_capacity)),
                                                        pop/station_caps[station_id],
                                                        pop
                                                        ]

            if int(station_caps[station_id] * danger_capacity_inv) > pop:
                underpopulation_list[station_id] = [
                                                        int((station_caps[station_id] * optimal_capacity) - pop),
                                                        pop/station_caps[station_id],
                                                        pop
                                                        ]

        except KeyError as e:
#             print(e)
            continue

    overpopulation_list = dict(sorted(overpopulation_list.items(), key=lambda item: item[1][1], reverse=True))
    underpopulation_list = dict(sorted(underpopulation_list.items(), key=lambda item: item[1][1], reverse=False))
    
    return overpopulation_list, underpopulation_list
        
    

def generate_jobs(date=datetime(year=2019, month=8, day=1)):
    # Params for what counts as "bad" capacity
    optimal_capacity = 0.5
    danger_capacity = 0.7
    danger_capacity_inv = 0.2
    
    # Get the maximum capacity of each station as dict
    station_caps = get_station_capacities()
    
    # Open the corrisponding predict file
    destination_directory = '../outputs/date_reports/'
    destination_file = destination_directory + date.strftime("%Y-%m-%d") + '.csv'
    if not os.path.exists(destination_file):
        print("File" + destination_file + " does not exist")
        return
    df = pandas.read_csv(destination_file)
  
    # Get list of times
    col_names = list(df.columns)[1:]
    
    # Loop through times
    for time in col_names:
        df = pandas.read_csv(destination_file,
                             usecols=['station', time])
        
        pop_dict = {}
        for index, row in df.iterrows():
            pop_dict[row['station']] = row[time]
#         print(time)
#         print(pop_dict)
#         return
        
        overpopulation_list, underpopulation_list = get_overunder_population(pop_dict)
        
        print(station_caps)
        print()
        print(overpopulation_list)
        print()
        print(underpopulation_list)
        
        overpopulation_pop = 0
        for x, y in overpopulation_list.items():
            overpopulation_pop += y[0]
        
        underpopulation_pop = 0
        for x, y in underpopulation_list.items():
            underpopulation_pop += y[0]
            
        print(underpopulation_pop)
        print(overpopulation_pop)
#         print(list(overpopulation_list.keys())[0])
        jobs = []
        while underpopulation_pop > 0 and overpopulation_pop > 0:
            num_to_move = min(overpopulation_list[list(overpopulation_list.keys())[0]][0], underpopulation_list[list(underpopulation_list.keys())[0]][0])
#             print(num_to_move)
            job = (
                list(overpopulation_list.keys())[0],
                list(underpopulation_list.keys())[0],
                num_to_move
            )
            
            jobs.append(job)
            print(jobs)
            
            overpopulation_pop -= num_to_move
            underpopulation_pop -= num_to_move
            
            overpopulation_list[job[0]]
            
            
            return
            
        
        return
            
            
generate_jobs(d)

{1: 31, 2: 20, 3: 20, 4: 20, 5: 40, 6: 20, 7: 29, 8: 30, 9: 24, 10: 16, 11: 30, 12: 20, 13: 30, 14: 30, 15: 16, 16: 20, 17: 20, 18: 30, 19: 30, 21: 30, 22: 20, 23: 30, 24: 20, 25: 30, 26: 20, 27: 20, 28: 30, 29: 29, 30: 20, 31: 20, 32: 30, 33: 23, 34: 30, 35: 30, 36: 40, 37: 30, 38: 40, 39: 20, 40: 21, 41: 20, 42: 30, 43: 30, 44: 30, 45: 30, 46: 35, 47: 40, 48: 40, 49: 40, 50: 40, 51: 40, 52: 32, 53: 40, 54: 33, 55: 36, 56: 40, 57: 23, 58: 40, 59: 20, 60: 30, 61: 25, 62: 40, 63: 35, 64: 40, 65: 40, 66: 40, 67: 40, 68: 40, 69: 40, 70: 28, 71: 40, 72: 31, 73: 30, 74: 30, 75: 40, 76: 38, 77: 29, 78: 40, 79: 27, 80: 40, 81: 40, 82: 22, 83: 40, 84: 30, 85: 35, 86: 38, 87: 38, 88: 30, 89: 40, 90: 40, 91: 30, 92: 40, 93: 40, 94: 40, 95: 40, 96: 30, 97: 40, 98: 40, 99: 30, 100: 25, 101: 30, 102: 40, 103: 40, 104: 40, 105: 36, 106: 40, 107: 40, 108: 40, 110: 40, 111: 40, 112: 30, 114: 40, 113: 40, 115: 30}

{44: [11, 0.8666666666666667, 26], 45: [11, 0.8666666666666667, 26], 97: [11, 0.775, 31]