#SINGLE RESPONSIBILITY PRINCIPLE



In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [None]:
class MLModel:

    def cleaning_preprocess(self,path):
      df = pd.read_csv(path)

      df['Time_taken']  = df['Time_taken'].fillna(df['Time_taken'].mean())

      le = LabelEncoder()

      df['3D_available'] = le.fit_transform(df['3D_available'])

      df['Genre'] = le.fit_transform(df['Genre'])
      
      print("Features have been preprocessed")

      return df

    def train(self, df):
        X = df.drop('Start_Tech_Oscar',axis=1) 

        y = df['Start_Tech_Oscar']

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

        dt = DecisionTreeClassifier()   

        dt.fit(X_train,y_train)

        y_predict = dt.predict(X_test)
        print("Model has been trained")

        return y_test,y_predict

    def evaluate(self, y_test,y_predict):
        accuracy = accuracy_score(y_test,y_predict)
        print('accuracy',accuracy)
        print("Model has been evaluated")


In [None]:
if __name__ == "__main__":
    model = MLModel()
    preprocessor = model.cleaning_preprocess('/content/Movie_classification.csv')
    y_test,y_predict = model.train(preprocessor)
    evaluator = model.evaluate(y_test,y_predict)


Features have been preprocessed
Model has been trained
accuracy 0.5508982035928144
Model has been evaluated


In [None]:
class MLModel:

     def train(self, df):
        X = df.drop('Start_Tech_Oscar',axis=1) 

        y = df['Start_Tech_Oscar']

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

        dt = DecisionTreeClassifier()   

        dt.fit(X_train,y_train)

        y_predict = dt.predict(X_test)
        print("Model has been trained")

        return y_test,y_predict


class Preprocessor:

    def cleaning_preprocess(self,path):

      df = pd.read_csv(path)

      df['Time_taken']  = df['Time_taken'].fillna(df['Time_taken'].mean())

      le = LabelEncoder()

      df['3D_available'] = le.fit_transform(df['3D_available'])

      df['Genre'] = le.fit_transform(df['Genre'])
      
      print("Features have been preprocessed")

      return df


class MlEvaluator:

    def evaluate(self, y_test,y_predict):
        accuracy = accuracy_score(y_test,y_predict)
        print('accuracy',accuracy)
        print("Model has been evaluated")


In [None]:
if __name__ == "__main__":

    model = MLModel()
    preprocessor = Preprocessor()
    evaluator = MlEvaluator()

    features = preprocessor.cleaning_preprocess('/content/Movie_classification.csv')
    y_test,y_predict = model.train(features)
    evaluator.evaluate(y_test,y_predict)

Features have been preprocessed
Model has been trained
accuracy 0.5568862275449101
Model has been evaluated


#Open-Closed Principle

In [None]:
################ OCP violation #############################
class Vectors:

    def label(self, data):
      for i in data.columns:
        if data[i].dtype=="object":
          le = LabelEncoder()
          data[i] = le.fit_transform(data[i])
      print("Label Encoder")

    def onehot(self, data):
      for i in data.columns:
        if data[i].dtype=="object":
          Oe=OneHotEncoder(sparse=False,drop='first')
          data[i]=Oe.fit_transform(data[[i]])

      print("onehot_encoding")

    def getdummy(self, data):
      for i in data.columns:
        if data[i].dtype=="object":
          data = pd.get_dummies(data[i],drop_first=True)
          data.drop(columns=i,axis=1,inplace=True)
      print("get_dummies")


class DLPipeline:

    def __init__(self, extractor, feature_type):
        self.extractor = extractor
        self.feature_type = feature_type

    def run(self, path):
        print("Running DL pipeline")

        df = pd.read_csv(path)

        df['Time_taken']  = df['Time_taken'].fillna(df['Time_taken'].mean())

        features = self._extract(df)
        X = df.drop('Start_Tech_Oscar',axis=1) 

        y = df['Start_Tech_Oscar']

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

        dt = DecisionTreeClassifier()   

        dt.fit(X_train,y_train)

        y_predict = dt.predict(X_test)

        accuracy = accuracy_score(y_test,y_predict)

        print('accuracy',accuracy)


    def _extract(self, data):
        if self.feature_type == "label_encoder":
            self.extractor.label(data)
        elif self.feature_type == "onehot_encoding":
            self.extractor.onehot(data)
        elif self.feature_type == "get_dummies":
            self.extractor.getdummy(data)


if __name__ == "__main__":
    extractor = Vectors()
    dl_pipeline = DLPipeline(extractor, "onehot_encoding")
    dl_pipeline.run('/content/Movie_classification.csv')
################ OCP violation #############################


Running DL pipeline
onehot_encoding
accuracy 0.5209580838323353


In [None]:
from abc import abstractmethod, ABC


class Extractor(ABC):

    @abstractmethod
    def extract(self, data):
        pass


class labelExtractor(Extractor):

    def extract(self, data):
      for i in data.columns:
        if data[i].dtype=="object":
          le = LabelEncoder()
          data[i] = le.fit_transform(data[i])
      print("Label Encoder")


class onehotExtractor(Extractor):

    def extract(self, data):
      for i in data.columns:
        if data[i].dtype=="object":
          Oe=OneHotEncoder(sparse=False,drop='first')
          data[i]=Oe.fit_transform(data[[i]])

      print("onehot_encoding")

class getdummyExtractor(Extractor):

    def extract(self, data):
      for i in data.columns:
        if data[i].dtype=="object":
          data = pd.get_dummies(data[i],drop_first=True)
          data.drop(columns=i,axis=1,inplace=True)
      print("get_dummies")


class DLPipeline:

    def __init__(self, extractor):
        self.extractor = extractor

    def run(self, path):
        print("Running DL pipeline")

        df = pd.read_csv(path)

        df['Time_taken']  = df['Time_taken'].fillna(df['Time_taken'].mean())

        features = self._extract(df)
        X = df.drop('Start_Tech_Oscar',axis=1) 

        y = df['Start_Tech_Oscar']

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

        dt = DecisionTreeClassifier()   

        dt.fit(X_train,y_train)

        y_predict = dt.predict(X_test)

        accuracy = accuracy_score(y_test,y_predict)

        print('accuracy',accuracy)

    def _extract(self, data):
        self.extractor.extract(data)


if __name__ == "__main__":
    extractor = labelExtractor()
    dl_pipeline = DLPipeline(extractor)
    dl_pipeline.run('/content/Movie_classification.csv')

Running DL pipeline
Label Encoder
accuracy 0.5508982035928144


#Liskov Principle

In [None]:
class Car():
  def __init__(self, type):
    self.type = type

class PetrolCar(Car):
  def __init__(self, type):
    self.type = type

car = Car("SUV")
car.properties = {"Color": "Red", "Gear": "Auto", "Capacity": 6}

petrol_car = PetrolCar("Sedan")
petrol_car.properties = ("Blue", "Manual", 4)
print(petrol_car.properties)

('Blue', 'Manual', 4)


In [None]:
class Car():
  def __init__(self, type):
    self.type = type

class PetrolCar(Car):
  def __init__(self, type):
    self.type = type

car = Car("SUV")
car.properties = {"Color": "Red", "Gear": "Auto", "Capacity": 6}

petrol_car = PetrolCar("Sedan")
petrol_car.properties = ("Blue", "Manual", 4)

cars = [car, petrol_car]

def find_red_cars(cars):
  red_cars = 0
  for car in cars:
    if car.properties['Color'] == "Red":
      red_cars += 1
  print(f'Number of Red Cars = {red_cars}')

find_red_cars(cars)

TypeError: ignored

In [None]:
class Car():
  def __init__(self, type):
    self.type = type
    self.car_properties = {}
  
  def set_properties(self, color, gear, capacity):
    self.car_properties = {"Color": color, "Gear": gear, "Capacity": capacity}

  def get_properties(self):
    return self.car_properties

class PetrolCar(Car):
  def __init__(self, type):
    self.type = type
    self.car_properties = {}

car = Car("SUV")
car.set_properties("Red", "Auto", 6)

petrol_car = PetrolCar("Sedan")
petrol_car.set_properties("Blue", "Manual", 4)

cars = [car, petrol_car]

def find_red_cars(cars):
  red_cars = 0
  for car in cars:
    if car.get_properties()['Color'] == "Red":
      red_cars += 1
  print(f'Number of Red Cars = {red_cars}')

find_red_cars(cars)

Number of Red Cars = 1
