In [65]:
# %load csver.py

# %load app.py
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime

from IPython.display import display
import string

class CSVer:
    
    DATE_INPUT_FORMAT = '%Y-%m-%d %H:%M:%S'
    DATE_OUTPUT_FORMAT = '%m/%Y'
    DATE_GRAPH_FORMAT = DATE_OUTPUT_FORMAT
    
    def __init__(self, csv_names, nrows = None):
        self.csvs = list(map(pd.read_csv, csv_names))

    def get_distinct_rows(self, column_name, csv_pos = 0):
        csv = self.csvs[csv_pos]
        distinct_rows = csv[column_name].unique()
        return distinct_rows

    def filter_column_by_value(self, column, value, csv_pos = 0):
        csv = self.csvs[csv_pos]
        return csv[csv[column] == value]
    
    def get_date_axis_from_column(self, data_frame = None,
                                  column = 'Data', csv_pos = 0):
        if data_frame is None:
            data_frame = self.csvs[csv_pos]

        dates = data_frame[column].values
        dates_date_time = [datetime.strptime(d, CSVer.DATE_INPUT_FORMAT) for d in dates]
        dates_axis = matplotlib.dates.date2num(dates_date_time)
        return dates_axis
    
    def get_random_item(self, column, csv_pos = 0):
        distinct_items = self.get_distinct_rows(column, csv_pos)
        item = np.random.choice(distinct_items)
        return item

  
    def get_x_y(self, x_column, y_column, filter = None, csv_pos = 0):
        rows = None
        if filter is not None:
            column, value = filter
            rows = self.filter_column_by_value(column, value, csv_pos)
        else:
            rows = self.csvs[csv_pos]
        
        x = rows[x_column]
        y = rows[y_column]
        return (x, y)

    def plot_x1_x2_y(self, x1, y1, x2, y2, x_label = '', y_label = '', title = ''):
        hfmt = matplotlib.dates.DateFormatter(CSVer.DATE_GRAPH_FORMAT)
        fig = plt.figure()
        ax = fig.add_subplot(1,1,1)
        ax.xaxis.set_major_formatter(hfmt)
        plt.setp(ax.get_xticklabels(), rotation=15)
        plt.scatter(x1, y1, s = 50, c = 'blue', alpha = 0.25)
        plt.xlabel(x_label)
        plt.ylabel(y_label)
        plt.title(title)

        ax = fig.add_subplot(1,1,1)
        plt.scatter(x2, y2, s = 50, c = 'blue', alpha = 0.25)

        plt.show()

    def plot_item_value_along_time(self, item):
        rows = self.filter_column_by_value(CSVer.ITEMS_COLUMN, item)
        x = csver.get_date_axis_from_column(data_frame = rows, column = CSVer.DATES_COLUMN)
        y = rows[CSVer.VALUES_COLUMN].values
        csver.plot_histogram(y, 'Data (dia/mes/ano)', 'Valor unitário solicitado (R$)', item)
        print('Ploting item \"{}\"...'.format(item))
    
    def plot_random_item_value_along_time(self):
        item = self.get_random_item()
        self.plot_item_value_along_time(item)
       
if __name__ == '__main__':
    solicitado = 'solicitado_small.csv'
    aprovado = 'aprovado_small.csv'
    
    l = [solicitado, aprovado]
    csver = CSVer(l)
    item = csver.get_random_item('Item', 0)
    
    xy = csver.get_x_y('ValorUnitarioSolicitado', 'Data', ('Item', item), 0)
    display(xy[1])

25     2009-04-26 00:00:00
40     2009-04-17 00:00:00
54     2009-04-14 00:00:00
61     2009-05-01 00:00:00
73     2010-01-18 00:00:00
252    2009-12-15 00:00:00
283    2009-06-01 00:00:00
323    2009-08-15 00:00:00
510    2011-02-01 00:00:00
559    2010-04-01 00:00:00
560    2010-04-01 00:00:00
861    2010-03-01 00:00:00
866    2010-04-05 00:00:00
919    2010-03-01 00:00:00
Name: Data, dtype: object