<a href="https://colab.research.google.com/github/nouval0425/Learning-Python/blob/main/UAS_OOP_runnable_in_terminal.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%writefile dataset.py
import pandas as pd

class RiceProductionDataset:
    """
    KONSEP OOP: ENCAPSULATION
    Kelas ini bertanggung jawab untuk manajemen data mentah.
    Dataframe disimpan dalam atribut privat (__dataframe) agar tidak bisa diubah
    secara tidak sengaja dari luar kelas.
    """
    def __init__(self):
        # Atribut privat (Encapsulation)
        self.__dataframe = None

    def load_data(self, path):
        """Memuat data dari file CSV ke dalam sistem."""
        self.__dataframe = pd.read_csv(path)
        print(f"[*] Data berhasil dimuat: {path}")

    def clean_data(self):
        """
        Membersihkan data: menghapus nilai kosong (NaN) dan data duplikat
        untuk menjaga kualitas input model.
        """
        if self.__dataframe is not None:
            self.__dataframe = self.__dataframe.dropna().drop_duplicates()
            print("[*] Pembersihan data selesai (Missing values & Duplicates ditangani).")

    def get_features_target(self, target_col='Produksi'):
        """
        Memisahkan kolom fitur (faktor penyebab) dan target (hasil produksi).
        Return: X (Fitur), y (Target)
        """
        X = self.__dataframe.drop(columns=[target_col])
        y = self.__dataframe[target_col]
        return X, y

Writing dataset.py


In [2]:
%%writefile preprocessing.py
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

class Preprocessor:
    """
    KONSEP OOP: ABSTRACTION
    Kelas ini menyembunyikan detail teknis transformasi data.
    User cukup memanggil satu method untuk mendapatkan pipeline yang siap pakai.
    """
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def split_data(self, test_size=0.2):
        """Membagi data menjadi data Latih (80%) dan data Uji (20%)."""
        return train_test_split(self.X, self.y, test_size=test_size, random_state=42)

    def build_pipeline(self):
        """
        Membangun sistem transformasi otomatis:
        - Kolom Angka: Diskalakan (Scaling) agar nilainya seragam.
        - Kolom Teks: Diubah ke angka (Encoding) agar mesin bisa membaca.
        """
        # Identifikasi tipe kolom otomatis
        numeric_features = self.X.select_dtypes(include=['int64', 'float64']).columns.tolist()
        categorical_features = self.X.select_dtypes(include=['object']).columns.tolist()

        # Pemrosesan untuk angka
        numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])
        # Pemrosesan untuk teks
        categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])

        # Menggabungkan semua transformasi (Abstraction)
        return ColumnTransformer(transformers=[
            ('num', numeric_transformer, numeric_features),
            ('cat', categorical_transformer, categorical_features)
        ])

Writing preprocessing.py


In [3]:
%%writefile models.py
from abc import ABC, abstractmethod
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression

class RegressionModel(ABC):
    """
    KONSEP OOP: ABSTRACTION (Abstract Class)
    Berperan sebagai 'kontrak'. Setiap model regresi baru HARUS
    punya method train, predict, dan evaluate.
    """
    @abstractmethod
    def train(self, X, y): pass

    @abstractmethod
    def predict(self, X): pass

    @abstractmethod
    def evaluate(self, y_true, y_pred): pass

class LinearRegressionModel(RegressionModel):
    """
    KONSEP OOP: INHERITANCE
    Mewarisi sifat dari RegressionModel.
    KONSEP OOP: POLYMORPHISM
    Mengimplementasikan method train & predict secara spesifik untuk Linear Regression.
    """
    def __init__(self, preprocessor_pipeline):
        # Menggabungkan preprocessor dan algoritma dalam satu alur (Pipeline)
        self.model = Pipeline(steps=[
            ('preprocessor', preprocessor_pipeline),
            ('regressor', LinearRegression())
        ])

    def train(self, X, y):
        """Proses belajar model dari data latih."""
        self.model.fit(X, y)
        print("[*] Model Linear Regression telah berhasil dilatih.")

    def predict(self, X):
        """Proses menebak hasil berdasarkan data baru."""
        return self.model.predict(X)

    def evaluate(self, y_true, y_pred):
        """Method wajib dari abstract class (bisa dikustomisasi)."""
        return None

Writing models.py


In [8]:
%%writefile evaluator.py
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error

class ModelEvaluator:
    """Class untuk menghitung metrik evaluasi (Single Responsibility Principle)."""
    @staticmethod
    def calculate_metrics(y_true, y_pred):
        mse = mean_squared_error(y_true, y_pred)
        return {
            "MSE": float(mse),
            "RMSE": float(np.sqrt(mse)),
            "MAE": float(mean_absolute_error(y_true, y_pred)),
            "R2": float(r2_score(y_true, y_pred)),
            "MAPE": float(mean_absolute_percentage_error(y_true, y_pred))
        }

    @staticmethod
    def display_results(metrics):
        print("\n--- METRIK EVALUASI ---")
        for k, v in metrics.items():
            print(f"{k}: {v:.4f}")

Overwriting evaluator.py


In [9]:
%%writefile analyzer.py
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import pandas as pd
from sklearn.inspection import permutation_importance

class ResidualAnalyzer:
    """Class untuk analisis residual."""
    def __init__(self, y_true, y_pred):
        self.residuals = y_true - y_pred

    def plot_distribution(self):
        plt.figure(figsize=(6, 4))
        sns.histplot(self.residuals, kde=True)
        plt.title('Residual Distribution')
        plt.savefig('residual_dist.png')

    def qq_plot(self):
        plt.figure(figsize=(6, 4))
        stats.probplot(self.residuals, dist="norm", plot=plt)
        plt.title('Q-Q Plot')
        plt.savefig('residual_qq.png')

class FeatureImportanceAnalyzer:
    """Class untuk analisis kepentingan fitur."""
    def __init__(self, model_pipeline, X_val, y_val):
        self.model_pipeline = model_pipeline
        self.X_val = X_val
        self.y_val = y_val

    def calculate_importance(self):
        result = permutation_importance(self.model_pipeline, self.X_val, self.y_val, n_repeats=5)
        self.imp_df = pd.DataFrame({'fitur': self.X_val.columns, 'score': result.importances_mean})
        return self.imp_df

    def plot_importance(self):
        plt.figure(figsize=(6, 4))
        sns.barplot(data=self.imp_df.sort_values(by='score', ascending=False), x='score', y='fitur')
        plt.title('Feature Importance')
        plt.savefig('feature_importance.png')

Writing analyzer.py


In [5]:
%%writefile persistence.py
import json
import os
from datetime import datetime

class ExperimentResult:
    """Objek data untuk menampung satu hasil eksperimen."""
    def __init__(self, model_name, metrics):
        self.model_name = model_name
        self.metrics = metrics
        self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

class ExperimentRepository:
    """
    KONSEP OOP: ENCAPSULATION & PERSISTENCE
    Mengelola koleksi hasil eksperimen dan menyimpannya ke file JSON.
    """
    def __init__(self):
        self.__experiments = [] # Koleksi objek (privat)

    def add_experiment(self, result: ExperimentResult):
        """Menambahkan objek hasil eksperimen ke list."""
        self.__experiments.append({
            "model": result.model_name,
            "metrics": result.metrics,
            "time": result.timestamp
        })

    def save_to_file(self, filename):
        """Simpan koleksi ke file fisik (Harddisk/JSON)."""
        with open(filename, 'w') as f:
            json.dump(self.__experiments, f, indent=4)
        print(f"[*] Riwayat disimpan ke: {filename}")

    def load_from_file(self, filename):
        """Membaca kembali hasil eksperimen dari file."""
        if os.path.exists(filename):
            with open(filename, 'r') as f:
                self.__experiments = json.load(f)
            return self.__experiments
        return []

Writing persistence.py


In [11]:
%%writefile main.py
from dataset import RiceProductionDataset
from preprocessing import Preprocessor
from models import LinearRegressionModel
from evaluator import ModelEvaluator
from analyzer import ResidualAnalyzer, FeatureImportanceAnalyzer
from persistence import ExperimentResult, ExperimentRepository

def main():
    # Load Data
    ds = RiceProductionDataset()
    ds.load_data('Data_Tanaman_Padi_Sumatera_version_1.csv')
    ds.clean_data()
    X, y = ds.get_features_target()

    # Preprocessing
    prep = Preprocessor(X, y)
    X_train, X_test, y_train, y_test = prep.split_data()

    # Model
    model = LinearRegressionModel(prep.build_pipeline())
    model.train(X_train, y_train)

    # Eval
    y_pred = model.predict(X_test)
    metrics = ModelEvaluator.calculate_metrics(y_test, y_pred)
    ModelEvaluator.display_results(metrics)

    # Analisis (Output Grafik)
    ra = ResidualAnalyzer(y_test, y_pred)
    ra.plot_distribution()
    ra.qq_plot()

    fia = FeatureImportanceAnalyzer(model.model, X_test, y_test)
    fia.calculate_importance()
    fia.plot_importance()

    # Simpan
    repo = ExperimentRepository()
    repo.add_experiment(ExperimentResult("Linear Regression Sumatera", metrics))
    repo.save_to_file('experiment_results.json')
    print("\n--- SEMUA OUTPUT TELAH DIHASILKAN ---")

if __name__ == "__main__":
    main()

Overwriting main.py


In [12]:
!python main.py

[*] Data berhasil dimuat: Data_Tanaman_Padi_Sumatera_version_1.csv
[*] Pembersihan data selesai (Missing values & Duplicates ditangani).
[*] Model Linear Regression telah berhasil dilatih.

--- METRIK EVALUASI ---
MSE: 90358283238.0180
RMSE: 300596.5456
MAE: 229035.3979
R2: 0.8978
MAPE: 0.3334
[*] Riwayat disimpan ke: experiment_results.json

--- SEMUA OUTPUT TELAH DIHASILKAN ---
