In [1]:
import sys 
import pandas as pd
import numpy as np
import sklearn as sl
import scipy.stats as ss
import logging
import matplotlib.pyplot as plt 
from datetime import datetime

In [87]:
class RJHB(object):

    def __init__(self, table):
        """
        paras:
            table: numpy array or DataFrame that contains info of rijihuabiao;       
        """
        self.__table = table
        self.preprocess()
        self.__groupByCompany = None
        self.groupByCo()

    
    def getTable(self):
        """
        fetch whole content of the table
        """
        return self.__table

    def preprocess(self):
        """
        preprocesses;
        """
        data  = self.getTable()
        data["month"] = pd.Series(["0"+x if int(x)<10 and len(x)<2 else x for x in data["month"]])
        data["day"] = pd.Series(["0"+x if int(x)<10 and len(x)<2 else x for x in data["day"]])
        data["time"] = data["year"] + data["month"] + data["day"]
        self.__table = data

    def getItem(self, columnName):
        """
        fetch one column of the table by name;
        paras:
            column keyword;
        return:
            A Series of specified column;
        """
        data = self.getTable()
        if columnName in data.columns:
            return data[columnName]
        else:
            print columnName + " if an invalid keyword!!!"
            raise KeyError

    def displayItem(self):

        data = self.getTable()
        return data.columns

    def NumOfRows(self):

        data = self.getTable()
        return len(data.index)


    def groupByCo(self):
        data = self.getTable()
        gdata  = data.groupby(data['company'])
        ret = dict(list(gdata))
        self.__groupByCompany = ret
        #comInfo = gdata[companyId]

    def groupByDate(self, *args):
        data = self.getTable()
        if isinstance(args[0], tuple):
            args = args[0]
        #print args
        keys = [data[x] for x in args]
        gdata = data.groupby(keys)
        return dict(list(gdata))

    def getCoInfo(self, companyId):
        """
        paras:
            companyId: id of company;
        """
        data = self.__groupByCompany
        companyInfo = data[companyId]
        return companyInfo

    def getStats_skew(self, data):
        pass

    def getStatsByCo(self, companyId):
        """
        paras:
            companyId: id of company;
        """

        coInfo = self.getCoInfo(companyId)

        return coInfo['volume'].describe()
    
    def getStatsByDate(self, *args, **kwargs):
        """
        paras:
            year:'year', optional;
            month:'month', optional;
            day:'day', optional;
        return:
            stats info of specified date;
        demo:
            ss = self.getStatsByDate('year', 'month', year="2018", month="05");
        """
        data = self.groupByDate(args)
        dates = tuple([kwargs[x] for x in args])
        #print dates
        stats = data[dates]['volume'].describe()
        return stats

In [114]:
#rjh.getItem("company")[:10]
#rjh.displayItem()
#rjh.NumOfRows()
rjh.getCoInfo(3000163)['volume'].plot()
#rjh.getStatsByCo(3000163)
#xx = rjh.groupByDate('year', 'month')
#rjh.getStatsByDate('year','month',year='2015',month='05')

<matplotlib.axes._subplots.AxesSubplot at 0x7fa28a0b22d0>

In [116]:
plt.show()

In [88]:
#data.head(10)
rjh = RJHB(data)
#rjh.getTable()[:10]['company']

In [15]:
filename = 'rjhb.csv.in'
data = pd.read_csv(filename, sep='\t',header=0, dtype={"year":str, "month":str, "day":str})