### Dataset Handling and Model Evaluation Exercise

#### Objectives:
- Understand and implement class inheritance and polymorphism in Python.
- Learn how to handle datasets and preprocess data.
- Implement methods for evaluating machine learning models.

#### Instructions:

1. **Create the `Dataset` Class:**
   - Add attributes for `name`, `data`, `labels`, and `split_ratio`.
   - Add methods to `change_name`, `split_data`, `normalize_data`, and `shuffle_data`.

2. **Implement the `MLModel` Class:**
   - Add attributes for `name` and `accuracy`.
   - Add methods to `train` and `evaluate` the model.

3. **Create the `DataScientist` Class:**
   - Add attributes for `first_name`, `last_name`, `employee_id`, `salary`, `datasets_handled`, and `models_trained`.
   - Add methods to `load_data`, `preprocess_data`, `train_model`, and `evaluate_model`.

4. **Unit Tests:**
   - Ensure all methods are correctly implemented and tested.


### Solution

In [None]:
import unittest
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

class Dataset:
    
    def __init__(self, name, data, labels, split_ratio=0.2):
        self.name = name
        self.data = data
        self.labels = labels
        self.split_ratio = split_ratio
        self.train_data = None
        self.test_data = None
        self.train_labels = None
        self.test_labels = None
    
    def change_name(self, new_name):
        self.name = new_name
    
    def split_data(self):
        self.train_data, self.test_data, self.train_labels, self.test_labels = train_test_split(
            self.data, self.labels, test_size=self.split_ratio, random_state=42)
    
    def normalize_data(self):
        scaler = StandardScaler()
        self.train_data = scaler.fit_transform(self.train_data)
        self.test_data = scaler.transform(self.test_data)
    
    def shuffle_data(self):
        indices = np.arange(self.train_data.shape[0])
        np.random.shuffle(indices)
        self.train_data = self.train_data[indices]
        self.train_labels = self.train_labels[indices]

class MLModel:
    
    def __init__(self, name):
        self.name = name
        self.accuracy = 0
    
    def train(self, train_data, train_labels):
        # Placeholder for training logic
        pass
    
    def evaluate(self, test_data, test_labels):
        # Placeholder for evaluation logic
        pass

class DataScientist:
    
    def __init__(self, first_name, last_name, employee_id, salary):
        self.first_name = first_name
        self.last_name = last_name
        self.employee_id = employee_id
        self.salary = salary
        self.datasets_handled = []
        self.models_trained = []
    
    def load_data(self, name, data, labels):
        dataset = Dataset(name, data, labels)
        self.datasets_handled.append(dataset)
        return dataset
    
    def preprocess_data(self, dataset):
        dataset.split_data()
        dataset.normalize_data()
        dataset.shuffle_data()
    
    def train_model(self, model, dataset):
        model.train(dataset.train_data, dataset.train_labels)
        self.models_trained.append(model)
    
    def evaluate_model(self, model, dataset):
        accuracy = model.evaluate(dataset.test_data, dataset.test_labels)
        model.accuracy = accuracy
        return accuracy

# Unit tests to check your solution

class TestMLClass(unittest.TestCase):
    def setUp(self):
        self.data = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]])
        self.labels = np.array([0, 1, 0, 1, 0])
        self.dataset = Dataset('Sample Dataset', self.data, self.labels)
        self.dataset.split_data()
        self.dataset.normalize_data()
        self.dataset.shuffle_data()
        self.model = MLModel('TestModel')
        self.data_scientist = DataScientist('Alice', 'Smith', 12345, 80000)
        
    def test_dataset_initialization(self):
        self.assertEqual(self.dataset.data.shape, (5, 2), 'Dataset initialization is incorrect')
        self.assertEqual(len(self.dataset.labels), 5, 'Dataset initialization is incorrect')
    
    def test_change_name(self):
        self.dataset.change_name('New Dataset')
        self.assertEqual(self.dataset.name, 'New Dataset', 'Change name method is incorrect')
    
    def test_split_data(self):
        self.assertEqual(self.dataset.train_data.shape[0], 4, 'Data split is incorrect')
        self.assertEqual(self.dataset.test_data.shape[0], 1, 'Data split is incorrect')
    
    def test_normalize_data(self):
        scaler = StandardScaler()
        scaled_data = scaler.fit_transform(self.dataset.train_data)
        np.testing.assert_almost_equal(self.dataset.train_data, scaled_data, err_msg='Data normalization is incorrect')
    
    def test_shuffle_data(self):
        initial_train_data = self.dataset.train_data.copy()
        self.dataset.shuffle_data()
        self.assertFalse(np.array_equal(self.dataset.train_data, initial_train_data), 'Data shuffling is incorrect')
    
    def test_data_scientist_initialization(self):
        self.assertEqual(self.data_scientist.first_name, 'Alice', 'DataScientist first name initialization is incorrect')
        self.assertEqual(self.data_scientist.last_name, 'Smith', 'DataScientist last name initialization is incorrect')
        self.assertEqual(self.data_scientist.employee_id, 12345, 'DataScientist employee ID initialization is incorrect')
        self.assertEqual(self.data_scientist.salary, 80000, 'DataScientist salary initialization is incorrect')
    
    def test_data_scientist_load_data(self):
        dataset = self.data_scientist.load_data('Sample Dataset', self.data, self.labels)
        self.assertIn(dataset, self.data_scientist.datasets_handled, 'DataScientist load data method is incorrect')
    
    def test_data_scientist_preprocess_data(self):
        dataset = self.data_scientist.load_data('Sample Dataset', self.data, self.labels)
        self.data_scientist.preprocess_data(dataset)
        self.assertIsNotNone(dataset.train_data, 'DataScientist preprocess data method is incorrect')
        self.assertIsNotNone(dataset.test_data, 'DataScientist preprocess data method is incorrect')

tests = TestMLClass()

tests_loaded = unittest.TestLoader().loadTestsFromModule(tests)

unittest.TextTestRunner().run(tests_loaded)