In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [4]:
class datainfo():
    def __init__(self,data):
        self.data=data
        
    def data_shape(self):
        print(f"Rows: {self.data.shape[0]} and columns: {self.data.shape[1]}")
        
    def data_head(self):
        return self.data.head()
    
    def data_missing(self):
        columns = self.data.columns
        counter=0
        for col in columns:
            count = self.data[col].isna().sum()
            if(count!=0):
                counter=counter+1
                print(f"No. of missing value for column {col} :- {count}")
        if(counter==0):
            print("No missing data")
            
    def describe_data(self,columns=''):
        if(len(columns)==0):
            return self.data.describe()
        else:
            return self.data[columns].describe()
        
            
    
        

In [5]:
def binary_sparse(data):
    count = data.isin([1]).sum().sum()
    print(f"Percentage of non-zero target class values: {(count/(data.shape[0]*data.shape[1]))*100}%")
    

In [6]:
def count_plot(fv_data,fv_xcol,fv_hue=None):
    sns.countplot(x =fv_xcol,hue=fv_hue, data = fv_data) 
    plt.title(f"Countplot for {fv_xcol}")
    plt.xlabel(fv_xcol)
    plt.ylabel('Count')

In [7]:
def dist_plot(data,column,nrows=1,ncols=1):
    fig, axes = plt.subplots(ncols=ncols, nrows=nrows)
    if(nrows==1 and ncols==1):
        (sns.distplot(data[column], hist=False,
                  kde=True,kde_kws = {'shade': True, 'linewidth': 3}))
        plt.title(f"Distribution for {column}")
        plt.xlabel(column)
    else: 
        for i, ax in zip(column, axes.flat):
            (sns.distplot(data[i], hist=False,
                      kde=True,kde_kws = {'shade': True, 'linewidth': 3},ax=ax))
            ax.set_title(f"Distribution for {i}")
            plt.xlabel(i)
    fig.text(-0.1, 0.5, 'Density', ha='center', va='center', rotation='vertical')  
    fig.tight_layout()
    plt.show()

    

In [8]:
def barh_percentage(data):
    fig, ax = plt.subplots()

    percent = data.values.ravel()/(data.sum(axis=0)[0])*100

    new_labels = [i+'  {:.2f}%'.format(j) for i, j in zip(data.index, percent)]

    plt.barh(data.index, data.values.ravel(), color='blue', edgecolor='red')
    plt.yticks(range(len(data.values)), new_labels)
    plt.tight_layout()

    for spine in ax.spines.values():
        spine.set_visible(False)

    ax.axes.get_xaxis().set_visible(False)
    ax.tick_params(axis="y", left=False)
    plt.show()

In [9]:
def bar_plot(df,x_col,y_col,fv_hue=None):
    sns.barplot(x=x_col, y=y_col, hue=fv_hue, data=df)
    plt.tight_layout()

In [10]:
def heatmap(data,x_shape=10,y_shape=10):
    fig, ax = plt.subplots(figsize=(x_shape,y_shape))   
    corr = data.corr()
    ax = sns.heatmap(
        corr, 
        vmin=-1, vmax=1, center=0,
        cmap=sns.diverging_palette(20, 220, n=200),
        square=True
    )
    ax.set_xticklabels(
        ax.get_xticklabels(),
        rotation=45,
        horizontalalignment='right'
    )

In [11]:
def scatter_plot(df,x_col,y_col,fv_hue=None):
    sns.scatterplot(x=x_col, y=y_col, hue=fv_hue, data=df)
    plt.tight_layout()