In [None]:
class Feature:
    
    def __init__(self,series):
        
        self.series = series
        self.report = pd.Series()
        self.desc = self.series.describe()
        
        self.null_count = self.series.isna().sum()
        self.null_percent = round((self.null_count/len(self.series))*100,2)
        
        self.types = self.series.dropna().apply(
            lambda x: type(x).__name__)
        self.type_counts = self.types.value_counts()
        
        self.val_counts = self.series.value_counts()
        self.cat_ratio = len(self.val_counts)/len(self.series)
        self.threshold = 0.20           
            
        self.report['Length'] = len(self.series)
        self.report['Values'] = self.series.count()
        self.report['Missing']= str(self.null_count) +'({})%'.format(self.null_percent)
        self.report['Unique'] = len(self.val_counts)
        
        self.fig = plt.figure(figsize=(12,12))
        
        
        self.type_map = {
            'str':self.StringFeature,
            'int':self.NumericFeature,
            'float':self.NumericFeature,
            'datetime64':self.DateFeature,
            'datetime32':self.DateFeature,
            'category':self.Categorical
        }
        
        if len(self.type_counts) > 1:
            self.Categorical()
        elif isinstance(
            self.series.index,pd.core.indexes.datetimes.DatetimeIndex):
            self.DateFeature()
        else:
            self.type_map[str(self.type_counts.index[0])]()
        
        
        if self.cat_ratio < self.threshold:
            self.cat_msg = '\
                This feature looks like it might be categorical.\n \
                The ratio of unique values to total items \n \
                is greater than the specified threshold. \n \n \
                To convert to a categorical, call .categorical method.'
            
            print(self.cat_msg)        
            
    def NumericFeature(self):
        
        self.report['Mean'] = self.series.mean()
        self.report['Median'] = self.series.median()
       
        self.grid = plt.GridSpec(12,12,wspace=0.8,hspace=2)
        
        self.box_plot(self.series,self.grid[:2,:])
        self.histo(self.series,self.grid[2:,:])
        
    def StringFeature(self):
        
        self.str_lengths = self.series.fillna('').apply(len)
        
        self.report['Mean Length'] = self.str_lengths.mean()
        self.report['Median Length'] = self.str_lengths.median()
        
        self.chart_words = 25
        self.countvec = CountVectorizer(stop_words='english')
        self.wordcounts= self.countvec.fit_transform(self.series)
        self.word_totals = self.wordcounts.toarray().sum(axis=0)
        self.words = self.countvec.get_feature_names()
        self.count_dict = dict(zip(self.words,self.word_totals))
        self.count_series = pd.Series(self.count_dict)
        self.count_series.sort_values(ascending = False,inplace=True)
        
         
        self.fig.set_figheight(16)
        self.grid = plt.GridSpec(16,12,wspace=0.6,hspace=1)
        
        self.box_plot(self.str_lengths,self.grid[:2,:])
        self.histo(self.str_lengths,self.grid[2:9,:])
        self.bar_plot(self.count_series.head(self.chart_words),self.grid[9:,:])
      
    def DateFeature(self):
        
        self.series.sort_index(ascending=True,inplace=True)
        
        self.report["Earliest Point"] = self.series.index[0]
        self.report["Latest Point"] = self.series.index[-1]
        
        self.series.sort_index(ascending=True,inplace=True)
        
        self.time_plot = self.fig.add_subplot()    
        self.fig.set_figheight(5)
        self.fig.set_figwidth(16)
        
        self.time_plot.plot(self.series)
    
    def Categorical(self):
        
        display(self.report)
        
        self.series = self.series.astype('category')
        
        self.fig = plt.figure()
        self.fig.set_figheight(5)
        self.fig.set_figwidth(.75*len(self.val_counts))
        
        self.grid = plt.GridSpec(12,12,wspace=0.6,hspace=1)
        
        self.bar_plot(self.val_counts,self.grid[:,:])
        
    def bar_plot(self,series,grid_loc):
        
        self.bar_chart = self.fig.add_subplot(grid_loc)
        self.bar_chart.bar(
            list(series.index),
            series,
            width=0.7)
        self.bar_chart.tick_params(
                    axis='x', 
                    labelrotation=55,
                    length=10,
                    labelsize= 10
                    )
        
    def box_plot(self,series,grid_loc):
        
        self.box_axes = self.fig.add_subplot(grid_loc)
        self.box_axes.boxplot(series,vert=False)
    
    def histo(self,series,grid_loc):

        self.histo = self.fig.add_subplot(grid_loc)
        self.n = self.histo.hist(series,bins=100)[0]
        
        self.vert_marks = series.describe().loc[
            ['mean',
            '25%',
            '50%',
            '75%']
        ]
       
        self.histo.vlines(
            self.vert_marks,
            ymin=0,
            ymax=max(self.n) *1.2,
            linestyles='dashed')
        
        for index,value in self.vert_marks.items():
            
            self.histo.annotate(
                index,
                (value,max(self.n) *.9)
            )