In [5]:
%run ../../common-imports.ipynb

# Object Oriented Python: Classes and Objects

In [6]:
# importing the libraries
import numpy.random as nr
import pandas as pd

# defining functions to get mean, standard deviation, min, max, median
class DescriptiveStatistics:
    
    def __init__ (self, numbers):
        ''' Constructor that takes a list of numbers'''
        self.numbers = numbers
    
    def mean (self):
        '''The average of the numbers in the list'''
        sum = 0
        n   = len(self.numbers)
        for number in self.numbers:
            sum += number
            
        self.ave = sum/n
        return self.ave
    
    def std(self):
        '''Find the standard deviation'''
        deltas = 0
        mu = self.mean()
        for number in self.numbers:
            deltas += (number - mu)**2
        n = len(self.numbers)
        standard_dev = (deltas/n)**(0.5)
        return standard_dev
    
    def min(self):
        '''Find the min'''
        return min(self.numbers)
    
    def max(self):
        return max(self.numbers)
    
    def median(self):
        n = len(self.numbers)
        n = int(n/2)        
        self.numbers.sort()
        
        if n%2 == 0:
            return (self.numbers[n-1] + self.numbers[n])/2
        else:
            return self.numbers[n-1]
 
    def summarize(self):
        '''Create a simple summary of the data'''
        mu   = self.mean()
        std  = self.std()
        mn   = self.min()
        mx   = self.max()
        md = self.median() 
        print ("Mean: {}".format(mu))
        print ("Std : {}".format(std))
        print ("Min : {}".format(mn))
        print ("Max : {}".format(mx))
        print ("Median : {}".format(md))

    
if __name__ == "__main__":
    x = [1,2,3,4]
    ds = DescriptiveStatistics(x)
    mu = ds.mean()
    sd = ds.std()
    print ("Mean is {}".format(mu))
    print ("Std is {}".format(sd))

Mean is 2.5
Std is 1.118033988749895


# Do descriptive statistics for 1-dim data

Let us now use our DescriptiveStatistics class to summarize some datasets.

The data-files are:
    * normal.txt
    * lognormal.txt
    * beta.txt
    * gamma.txt
    * poisson.txt

In [7]:
datasets = ["normal", "lognormal", "beta", "poisson", "gamma"]

for dataset in datasets:
    filename = "{}.txt".format(dataset)
    data = pd.read_csv(filename)
    data.columns = ['x']
    data.describe()
    t = list(data.x)
    print(len(t))

    # applying descriptive statistics
    ds = DescriptiveStatistics(t)
    print ("Statistics coming from our simple python class for dataset: {}".format(dataset))
    
    ds.summarize()
    
    print ("Statistics from Pandas dataframe.describe() for this dataset")
    print (data.describe())
    print ("------"*20)

10000
Statistics coming from our simple python class for dataset: normal
Mean: 4.059564682382221
Std : 6.9766217043607766
Min : -22.745352049794565
Max : 31.437346657214565
Median : 4.0177332937019985
Statistics from Pandas dataframe.describe() for this dataset
                  x
count  10000.000000
mean       4.059565
std        6.976971
min      -22.745352
25%       -0.672681
50%        4.017733
75%        8.861807
max       31.437347
------------------------------------------------------------------------------------------------------------------------
10000
Statistics coming from our simple python class for dataset: lognormal
Mean: 1.9116784677476164
Std : 1.6152150717559268
Min : 0.0840665225095139
Max : 38.593767835017424
Median : 1.479593035848105
Statistics from Pandas dataframe.describe() for this dataset
                  x
count  10000.000000
mean       1.911678
std        1.615296
min        0.084067
25%        0.920601
50%        1.479593
75%        2.383760
max       38.