# FInDe

## Availables banks
- Cajamar
- Openbank
- Unicaja

## Fist step
Put your csv or xls files inside ```accounts/current/<bank_name>/file``` and ```accounts/savings/<bank_name>/file```

## Requisites

In [1]:
!pip install plotly



## Code

### Imports

In [2]:
import plotly as py
import plotly.graph_objs as go

import numpy as np
import datetime as dt
import os.path
import xlrd
import pandas as pd

py.offline.init_notebook_mode(connected=True)

### Constant and settings

In [3]:
CURRENT = "current"
SAVING = "saving"
ACCOUNTTYPES  = [CURRENT, SAVING]
SUPPORTED_BANKS = {
    "unicaja": "Unicaja",
    "cajamar": "Cajamar",
    "openbank": "Openbank"
}

### Process CSV and XLS

In [4]:
def processCSV(file, splitToken, bank): #TODO Move to pandas
    fecha = []
    saldo = []
    movimientos = []
    first = True
    
    if bank == 'cajamar':
        with open(file) as f:
            for line in f:
                if first is False:
                    tokens = line.split(splitToken)
                    fecha.append(dt.datetime.strptime(tokens[0], '%d/%m/%Y').date().isoformat())
                    saldo.append(float(tokens[len(tokens)-1][:-1].replace('.', '').replace(',', '.')))
                    movimientos.append(float(tokens[len(tokens)-2].replace('.','').replace(',','.').replace(' ', '')))
                first = False

    return (saldo, fecha, movimientos)

In [5]:
def processXLS(file, bank):   
    balance = []
    date = []
    movements = []
    
    if bank == 'unicaja':
        book = xlrd.open_workbook(file)
        first_sheet = book.sheet_by_index(0)
        date_int = first_sheet.col_values(0)[5:]
        balance = first_sheet.col_values(5)[5:]
        movements = first_sheet.col_values(3)[5:]
        date = [xlrd.xldate_as_datetime(date_int[i], book.datemode).date().isoformat() for i in xrange(0, len(date_int))]
    
    if bank == 'openbank':
        df = pd.read_html(file)
        df = df[0].dropna(axis=0, thresh=4)
        date_str = df.iloc[1:,1]
        date = [dt.datetime.strptime(x, '%d/%m/%Y').date().isoformat() for x in date_str]
        balance_str = df.iloc[1:,9]
        balance = [float((x[:-2] + ',' + x[-2:]).replace('.','').replace(',','.')) for x in balance_str]
        movements_str = df.iloc[1:,7]
        movements = [float((x[:-2] + ',' + x[-2:]).replace('.','').replace(',','.')) for x in movements_str]

    return (balance, date, movements)

### Load files

In [6]:
def sortDataFiles(bankdata):
    for accountType in ACCOUNTTYPES:
        for bankname in bankdata[accountType]:
            if "movements" in bankdata[accountType][bankname]:
                (bankdata[accountType][bankname]["date"], bankdata[accountType][bankname]["balance"], bankdata[accountType][bankname]["movements"]) = zip(*sorted(zip(bankdata[accountType][bankname]["date"], bankdata[accountType][bankname]["balance"], bankdata[accountType][bankname]["movements"])))
            else:
                (bankdata[accountType][bankname]["date"], bankdata[accountType][bankname]["balance"]) = zip(*sorted(zip(bankdata[accountType][bankname]["date"], bankdata[accountType][bankname]["balance"])))
            
    return bankdata

In [7]:
def loadDataFiles():    
    print("Data files found:")
    bankdata = {}
    for accountType in ACCOUNTTYPES:
        path = "accounts/{}/".format(accountType)
        directories = [f for f in os.listdir(path) if os.path.isdir(path+f)]
        bankdata[accountType] = {}
        for bankname in directories:
            bankdata[accountType][bankname] = {"date":[], "balance":[], "movements":[]}
            files = [os.path.join(path+bankname,f) for f in os.listdir(path+bankname)]
            #files.sort(key=lambda x: os.path.getmtime(x))
            for datafile in files:
                extension = os.path.splitext(datafile)[1]
                if extension == ".xls":
                    print("{} - {}".format(bankname, datafile))
                    (balance, date, movements) = processXLS(datafile, bankname)
                    bankdata[accountType][bankname]["balance"].extend(balance)
                    bankdata[accountType][bankname]["date"].extend(date)
                    bankdata[accountType][bankname]["movements"].extend(movements)
                if extension == ".csv":
                    print("{} - {}".format(bankname, datafile))
                    (balance, date, movements) = processCSV(datafile, '\t', bankname)
                    bankdata[accountType][bankname]["balance"].extend(balance)
                    bankdata[accountType][bankname]["date"].extend(date)
                    bankdata[accountType][bankname]["movements"].extend(movements)

    return sortDataFiles(bankdata)

In [8]:
def getIntervalDates(bankdata):
    #TODO
    '''
    1. Get min total date
    2. Get max total date
    '''
    return (0, 0)

In [9]:
def fillEmpty(bankdata):
    #TODO
    '''
    1. Search total last date
    2. For each bankdata fill each date from each last date to the total last date with the last value
    '''
    return bankdata

### Traces

In [10]:
def buildTraces(bankdata, min_trace = False, mean_trace = False, max_trace = False):
    data = []
    min_amount = 0
    max_amount = 0
    
    for accountType in ACCOUNTTYPES: # Account type
        for bank in bankdata[accountType]: # Bank name
            dates = np.array(bankdata[accountType][bank]['date'])
            balances = np.array(bankdata[accountType][bank]['balance'])
            
            bankName = '(' + accountType + ') ' + SUPPORTED_BANKS[bank]

            trace_main = go.Scatter(
                x = dates,
                y = balances,
                name = bankName + ': Saldo ' + str(format(balances[-1], ',.2f').replace(",", "X").replace(".", ",").replace("X", ".")) + '€',
                #line = dict(
                #    color = 'green'
                #),
                mode = 'lines'
            )
            data.append(trace_main)

            if max_trace:
                trace_max = go.Scatter(
                    x = dates,
                    y = [balances.max() for f in dates],
                    name = bankName + ': Saldo máximo',
                    #visible = 'legendonly',
                    #hoverinfo = 'name',
                    line = dict(
                        #color = 'cyan',
                        width = 4,
                        dash = 'dot'
                    )
                )
                data.append(trace_max)

            if mean_trace:
                trace_mean = go.Scatter(
                    x = dates,
                    y = [balances.mean() for f in dates],
                    name = bankName + ': Saldo medio',
                    #hoverinfo = 'none',
                    line = dict(
                        #color = 'magenta',
                        width = 4,
                        dash = 'dashdot'
                    )
                )
                data.append(trace_mean)

            if min_trace:
                trace_min = go.Scatter(
                    x = dates,
                    y = [balances.min() for f in dates],
                    name = bankName + ': Saldo mínimo',
                    line = dict(
                        #color = 'red',
                        width = 4,
                        dash = 'dot'
                    )
                )
                data.append(trace_min)
                
            # Extra
            if balances.max() > max_amount:
                max_amount = balances.max()
        
    return (data, min_amount, max_amount)

In [11]:
def buildPiggy(bankdata):
    traces = []
    
    for bank in bankdata[SAVING]: # Bank name
        balances = np.array(bankdata[SAVING][bank]['balance'])
        
        trace = go.Bar(
            x = ['Piggy'],
            y = [balances[-1]],
            name = SUPPORTED_BANKS[bank]
        )
        
        traces.append(trace)
        
    return traces

In [43]:
def buildProfit(bankdata): #TODO Fix
    data = []

    for bank in bankdata[CURRENT]:
        date = bankdata[CURRENT][bank]['date']
        movements = bankdata[CURRENT][bank]['movements']

        pDay = date[0][:7]
        pValue = 0
        months = []
        profit = []
        for cDay, cValue in zip(date, movements):
            month = dt.datetime.strptime(cDay, '%Y-%m-%d')
            print(month)
            pValue += float(cValue)
            if cDay[:7] != pDay:
                months.append(pDay)
                profit.append(pValue)
                pDay = cDay[:7]
                pValue = 0
        months.append(pDay)
        profit.append(pValue)
        
        trace = go.Bar(
            x = months,
            y = profit,
            name = "Profit for {}".format(bank)
        )
        data.append(trace)
        
    return data

### Plot

In [13]:
def plot_super_view(bankdata, minDate, maxDate):
    (data, min_amount, max_amount) = buildTraces(bankdata)
    piggyData = buildPiggy(bankdata)
    for piggyTrace in piggyData:
        newTrace = go.Bar(
            x = piggyTrace.x,
            y = piggyTrace.y,
            name = piggyTrace.name,
            xaxis = 'x2',
            yaxis = 'y2'
        )
        data.append(newTrace)
    layout = go.Layout(
        xaxis=dict(
            domain=[0, 0.9]
        ),
        yaxis=dict(
            range=[min_amount, max_amount]
        ),
        xaxis2=dict(
            domain=[0.9, 1]
        ),
        yaxis2=dict(
            anchor='x2',
            range=[min_amount, max_amount],
            showticklabels=False
        ),
        title = 'Super view ' + str(minDate) + ' / ' + str(maxDate),
        barmode='stack'
    )
    fig = go.Figure(data=data, layout=layout)
    py.offline.iplot(fig, filename='side-by-side-subplot')

In [14]:
def plot_general(bankdata, minDate, maxDate):
    (data, _, _) = buildTraces(bankdata)
    layout = go.Layout(title = 'Saldo ' + str(minDate) + ' / ' + str(maxDate),
                  xaxis = dict(title = 'Fecha'),
                  yaxis = dict(title = 'Saldo (€)'),
                  showlegend = True
    )

    fig = dict(data=data, layout=layout)
    py.offline.iplot(fig, filename='styled-line')

In [15]:
def plot_piggy(bankdata):
    data = buildPiggy(bankdata)
    layout = go.Layout(
        title = 'Saving',
        barmode='stack'
    )

    fig = go.Figure(data=data, layout=layout)
    py.offline.iplot(fig, filename='stacked-bar')

In [16]:
def plot_profit(bankdata):
    data = buildProfit(bankdata)
    layout = go.Layout(
        title = 'Monthly profit',
        barmode='group'
    )

    fig = go.Figure(data=data, layout=layout)
    py.offline.iplot(fig, filename='grouped-bar')

### Main

In [44]:
bankdata = loadDataFiles() # Load files
(minDate, maxDate) = getIntervalDates(bankdata) # Get dates interval
bankdata = fillEmpty(bankdata) # Fill missing data with the same value

plot_super_view(bankdata, minDate, maxDate)
plot_profit(bankdata)

Data files found:
openbank - accounts/current/openbank/nomina.xls
openbank - accounts/current/openbank/nomina2.xls
cajamar - accounts/current/cajamar/saldo.csv
openbank - accounts/saving/openbank/ahorro2.xls
openbank - accounts/saving/openbank/ahorro.xls
cajamar - accounts/saving/cajamar/saldo.csv


2019-01-21 00:00:00
2019-01-21 00:00:00
2019-01-21 00:00:00
2019-01-21 00:00:00
2019-01-21 00:00:00
2019-01-21 00:00:00
2019-01-21 00:00:00
2019-01-21 00:00:00
2019-01-21 00:00:00
2019-01-21 00:00:00
2019-01-21 00:00:00
2019-01-21 00:00:00
2019-01-22 00:00:00
2019-01-22 00:00:00
2019-01-22 00:00:00
2019-01-22 00:00:00
2019-01-22 00:00:00
2019-01-22 00:00:00
2019-01-23 00:00:00
2019-01-23 00:00:00
2019-01-23 00:00:00
2019-01-23 00:00:00
2019-01-24 00:00:00
2019-01-24 00:00:00
2019-01-24 00:00:00
2019-01-24 00:00:00
2019-01-25 00:00:00
2019-01-25 00:00:00
2019-01-25 00:00:00
2019-01-25 00:00:00
2019-01-25 00:00:00
2019-01-25 00:00:00
2019-01-28 00:00:00
2019-01-28 00:00:00
2019-01-31 00:00:00
2019-01-31 00:00:00
2019-01-31 00:00:00
2019-01-31 00:00:00
2019-02-01 00:00:00
2019-02-01 00:00:00
2019-02-01 00:00:00
2019-02-01 00:00:00
2019-02-01 00:00:00
2019-02-01 00:00:00
2019-02-04 00:00:00
2019-02-04 00:00:00
2019-02-06 00:00:00
2019-02-06 00:00:00
2019-02-06 00:00:00
2019-02-06 00:00:00


2016-07-29 00:00:00
2016-07-30 00:00:00
2016-07-31 00:00:00
2016-08-02 00:00:00
2016-08-02 00:00:00
2016-08-03 00:00:00
2016-08-03 00:00:00
2016-08-04 00:00:00
2016-08-10 00:00:00
2016-08-19 00:00:00
2016-08-19 00:00:00
2016-08-22 00:00:00
2016-08-24 00:00:00
2016-08-25 00:00:00
2016-08-26 00:00:00
2016-08-31 00:00:00
2016-09-01 00:00:00
2016-09-02 00:00:00
2016-09-05 00:00:00
2016-09-07 00:00:00
2016-09-13 00:00:00
2016-09-13 00:00:00
2016-09-14 00:00:00
2016-09-16 00:00:00
2016-09-16 00:00:00
2016-09-20 00:00:00
2016-09-21 00:00:00
2016-09-22 00:00:00
2016-09-26 00:00:00
2016-09-26 00:00:00
2016-09-26 00:00:00
2016-09-27 00:00:00
2016-09-28 00:00:00
2016-09-29 00:00:00
2016-09-30 00:00:00
2016-10-02 00:00:00
2016-10-04 00:00:00
2016-10-06 00:00:00
2016-10-07 00:00:00
2016-10-08 00:00:00
2016-10-14 00:00:00
2016-10-20 00:00:00
2016-10-24 00:00:00
2016-10-25 00:00:00
2016-10-26 00:00:00
2016-10-26 00:00:00
2016-10-27 00:00:00
2016-10-27 00:00:00
2016-10-27 00:00:00
2016-10-28 00:00:00
