In [1]:
import pandas as pd
import numpy as np

In [2]:
class Animal:
    def __init__(self, name):
        self.name = name
    
    def eat(self):
        print(f'{self.name} is eating')
        
    def sleep(self):
        print(f'{self.name} is sleeping')
        
    def go(self):
        print(f'{self.name} is walking on its four legs')
    
    def get_name(self):
        return self.name
        
#     def talk(self):
#         print(f'{self.name} ??')

In [3]:
p = Animal('fox')
p.eat()
p.go()
p.sleep()

fox is eating
fox is walking on its four legs
fox is sleeping


In [7]:
class Human(Animal):
    def __init__(self, name):
        super().__init__(name)
    
    def talk(self):
        print(f'{self.name} is talking like a human being')
    
    def go(self):
        print(f'{self.name} is walking')

In [8]:
j = Human('John')

In [9]:
j.eat()
j.go()
j.sleep()
j.talk()

John is eating
John is walking
John is sleeping
John is talking like a human being


In [33]:
df = pd.DataFrame()

In [34]:
type(df)

pandas.core.frame.DataFrame

In [35]:
df.describe()

ValueError: Cannot describe a DataFrame without columns

In [36]:
df.insight()

AttributeError: 'DataFrame' object has no attribute 'insight'

In [23]:
class MyDataFrame(pd.core.frame.DataFrame):
    
    def describe(self):
        try: 
            return super().describe()
        
        except ValueError as err: 
            print (err)
            
    def insight(self):
        print(self.shape)
        for col in self.columns:
            print(col + ' has ' + str(self[col].nunique()) + ' unique values')
            
            

In [38]:
df = MyDataFrame()

In [39]:
type(df)

__main__.MyDataFrame

In [40]:
df.describe()

Cannot describe a DataFrame without columns


In [41]:
df['A'] = [x for x in range(100)]
df['B'] = np.random.normal(0, 1, 100)

In [42]:
df.describe()

Unnamed: 0,A,B
count,100.0,100.0
mean,49.5,-0.009965
std,29.011492,1.009797
min,0.0,-1.942949
25%,24.75,-0.684294
50%,49.5,-0.038899
75%,74.25,0.55474
max,99.0,2.622873


In [43]:
df.head()

Unnamed: 0,A,B
0,0,1.328223
1,1,0.348852
2,2,0.505596
3,3,-0.66784
4,4,-1.942949


In [44]:
df.tail()

Unnamed: 0,A,B
95,95,-0.949194
96,96,-0.183949
97,97,-0.122585
98,98,0.493523
99,99,-0.676


In [45]:
df.insight()

(100, 2)
A has 100 unique values
B has 100 unique values


In [46]:
# dir(df)

## Abstract Classes

In [49]:
import os

class FileProcessor:
    
    def __init__(self, path='./data/', file_types=['.dat']):
        print('doing some initialization')
        
        self.files = self.find_all_files(path, file_types)

    
    def find_all_files(self, path, file_types):
        paths = []
        for root, dirs, files in os.walk(path):
            for file in files:
                if any([file.endswith(ft) for ft in file_types]):
                    path = os.path.join(root, file)
                    print(path)
                    paths.append(path)
        print(paths)
        return paths
    
    def start(self):
        """
        Entry point for the class
        """
        for file in self.files:
            self.process_file(file)
        
    def process_file(self, file_path):
        """
        abstract method
        """
        raise NotImplemented('process_file method not implemented')

In [50]:
FileProcessor().start()

doing some initialization
./data/week1_50 years of data science v2.pdf.dat
./data/week1_a Very Short History Of Data Science_1.docx.dat
./data/week1_assignment 1 Summary.pdf.dat
./data/week1_380.txt.dat
./data/week1_A Very Short History Of Data Science.docx.dat
./data/week1_50 Years Data Science Summary.docx.dat
['./data/week1_50 years of data science v2.pdf.dat', './data/week1_a Very Short History Of Data Science_1.docx.dat', './data/week1_assignment 1 Summary.pdf.dat', './data/week1_380.txt.dat', './data/week1_A Very Short History Of Data Science.docx.dat', './data/week1_50 Years Data Science Summary.docx.dat']


TypeError: 'NotImplementedType' object is not callable

In [51]:
class DocProcessor(FileProcessor):
    def process_file(self, file_path):
        print(file_path)
        
    

In [52]:
pro = DocProcessor()
pro.start()

doing some initialization
./data/week1_50 years of data science v2.pdf.dat
./data/week1_a Very Short History Of Data Science_1.docx.dat
./data/week1_assignment 1 Summary.pdf.dat
./data/week1_380.txt.dat
./data/week1_A Very Short History Of Data Science.docx.dat
./data/week1_50 Years Data Science Summary.docx.dat
['./data/week1_50 years of data science v2.pdf.dat', './data/week1_a Very Short History Of Data Science_1.docx.dat', './data/week1_assignment 1 Summary.pdf.dat', './data/week1_380.txt.dat', './data/week1_A Very Short History Of Data Science.docx.dat', './data/week1_50 Years Data Science Summary.docx.dat']
./data/week1_50 years of data science v2.pdf.dat
./data/week1_a Very Short History Of Data Science_1.docx.dat
./data/week1_assignment 1 Summary.pdf.dat
./data/week1_380.txt.dat
./data/week1_A Very Short History Of Data Science.docx.dat
./data/week1_50 Years Data Science Summary.docx.dat
