In [3]:
import sklearn
from sklearn.datasets import load_iris
import pandas as pd
import matplotlib.pyplot as plt

# Load the iris data to memory
iris = load_iris()

# The loaded iris is a dictionary - so check it
print(iris.keys())

# You can print the description of the dataset
print(iris['DESCR'])

# Convert the loaded data and targets into a dataframe and print the first ten rows.
X = pd.DataFrame(data = iris.data, columns = iris.feature_names)
print(X.head(10))

Y = pd.DataFrame(data=iris.target, columns = ['irisType'])
print(Y.head(10))

# Explore the number of classes in the target set
print(Y['irisType'].value_counts())

# The names of the classes(i.e. species in the iris dataset)
print(iris.target_names)


dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])
.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for ea

In [8]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.losses import MeanAbsoluteError, MeanSquaredError, CategoricalCrossentropy, SparseCategoricalCrossentropy

# Define y_true and y_pred for a regression task
y_true = np.array([1., 0.])
y_pred = np.array([2., 3.])

# Compute and print the Mean Absolute Error (MAE)
mae_loss = MeanAbsoluteError()
print("The Mean Absolute Error is:", mae_loss(y_true, y_pred).numpy())

# Compute and print the Mean Square Error (MSE)
mse_loss = MeanSquaredError()
print("The Mean Square Error is:", mse_loss(y_true, y_pred).numpy())

# Define y_true and y_pred for a classification task
# Use one-hot vector representation
y_true = np.array([[0, 1, 0], [1, 0, 0]])
y_pred = np.array([[0.15, 0.75, 0.1], [0.75, 0.15, 0.1]])

# Compute and print the categorical cross-entropy loss
cross_entropy_loss = CategoricalCrossentropy()
print("The Categorical Cross Entropy Loss is:", cross_entropy_loss(y_true, y_pred).numpy())

# Use label-encoded representation for the class
y_true = np.array([1, 0])
y_pred = np.array([[0.15, 0.75, 0.1], [0.75, 0.15, 0.1]])

# Compute and print the sparse categorical cross-entropy loss
cross_entropy_loss = SparseCategoricalCrossentropy()
print("The Sparse Categorical Cross Entropy Loss is:", cross_entropy_loss(y_true, y_pred).numpy())


The Mean Absolute Error is: 2.0
The Mean Square Error is: 5.0
The Categorical Cross Entropy Loss is: 0.2876821
The Sparse Categorical Cross Entropy Loss is: 0.28768212


In [3]:
#Assignment
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Assuming vt, r1, and r2 are given
vt = 10
r1 = 2
r2 = 3

# Calculate v1 and v2
v1 = (vt - r1) / (r1 + r2)
v2 = (vt * r2) / (r1 + r2)

# Prepare the data for linear regression
X = [[r1, r2]]
y_v1 = [v1]
y_v2 = [v2]

# Train the linear regression models
reg_v1 = LinearRegression().fit(X, y_v1)
reg_v2 = LinearRegression().fit(X, y_v2)
    
# Predict v1 and v2 for given vt, r1, and r2
predicted_v1 = reg_v1.predict([[r1, r2]])
predicted_v2 = reg_v2.predict([[r1, r2]])

# Calculate the mean squared error
mse_v1 = mean_squared_error(y_v1, predicted_v1)
mse_v2 = mean_squared_error(y_v2, predicted_v2)

print("Predicted v1:", predicted_v1[0])
print("Predicted v2:", predicted_v2[0])
print("Mean Squared Error for v1:", mse_v1)
print("Mean Squared Error for v2:", mse_v2)


Predicted v1: 1.6
Predicted v2: 6.0
Mean Squared Error for v1: 0.0
Mean Squared Error for v2: 0.0


**Optimizing Algorithms and Use Cases:**

**1. AdaGrad:**

* **Use Case 1: Sparse Features**
  - AdaGrad excels when dealing with datasets containing features that rarely update significantly. Its cumulative learning rate adaptation helps prevent frequent updates from diminishing the impact on less frequent ones.
  - Example: Recommender systems often have user-item interactions where some users interact with only a small subset of items. AdaGrad can effectively train the model for these users.
* **Use Case 2: Non-stationary or Time-Varying Problems**
  - In scenarios where the underlying distribution of data changes over time, AdaGrad's continuously adapting learning rates can be beneficial. However, it's crucial to monitor the learning rates as they can eventually become very small.
  - Example: Financial forecasting where market conditions are constantly evolving.

**2. RMSProp:**

* **Use Case 1: Faster Convergence than Standard SGD**
  - RMSProp addresses the issue of SGD's potentially slow convergence in certain situations by incorporating a decaying average of squared gradients. This leads to smoother updates and often faster convergence.
  - Example: Image classification tasks where standard SGD might struggle to find the optimal solution efficiently.
* **Use Case 2: Balancing AdaGrad's Adaptability with Stability**
  - RMSProp offers a balance between AdaGrad's aggressive learning rate adaptation and SGD's potentially slow convergence. It's a good choice when you need an algorithm that adapts to changing data distributions but avoids overly diminishing learning rates.
  - Example: Natural language processing (NLP) tasks where the vocabulary might contain rare words with less frequent updates.

**3. RMSProp with Nesterov Momentum (NRMSProp):**

* **Use Case 1: Improved Convergence over Standard RMSProp**
  - NRMSProp incorporates Nesterov momentum, which can help the algorithm escape local minima and reach better solutions faster. It often converges more quickly than standard RMSProp.
  - Example: Deep neural network training where standard RMSProp might get stuck in suboptimal regions of the loss landscape.
* **Use Case 2: Complex or Deep Learning Architectures**
  - NRMSProp's combination of adaptive learning rates and momentum can be particularly advantageous for complex models that require careful navigation in high-dimensional spaces.
  - Example: Training generative adversarial networks (GANs) or recurrent neural networks (RNNs) for tasks like image generation or language translation.

**4. Adam:**

* **Use Case 1: General-Purpose Optimizer with Adaptive Learning Rates**
  - Adam is widely considered a powerful and versatile optimizer due to its combination of momentum and adaptive learning rates. It often performs well across various deep learning tasks, making it a popular default choice.
  - Example: A wide range of deep learning problems, including computer vision, NLP, and time series forecasting.
* **Use Case 2: Fine-Tuning Pre-trained Models**
  - Adam can be effective when fine-tuning pre-trained models on new datasets. Its adaptive learning rates help the model adjust to the specific data it's being fine-tuned on.
  - Example: Transfer learning scenarios where you're adapting a pre-trained image classification model for a new set of image categories.



In [24]:
#Assignment Linear Regression
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [41]:
np.random.seed(0)
R1 = np.random.uniform(1, 10000, 100)
R2 = np.random.uniform(1, 10000, 100)
Vt = 120

In [42]:
V1 = Vt * (R1 / (R1 + R2))
V2 = Vt * (R2 / (R1 + R2))

In [43]:
data = pd.DataFrame({
    'R1' : R1,
    'R2' : R2,
    'V1' : V1})

In [44]:
X = data[['R1','R2']]
y = data['V1']

In [45]:
X_train,X_test,y_train,y_test = train_test_split(X, y,test_size = 0.2, random_state = 42)

In [46]:
model = LinearRegression()
model.fit(X_train,y_train)

In [47]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test,y_pred)
model.coef_, model.intercept_, mse

(array([ 0.0066365 , -0.00621271]), 57.151037551471404, 50.41004897634889)

In [48]:
print(X_test)

             R1           R2
83  6925.028722  2379.690321
53  1021.346063  6155.980083
70  9764.618191  5210.845025
45  6706.708058  7039.181947
44  6668.000388  8062.133696
39  6818.521171  4314.752936
22  4615.332143  9560.880264
80  3180.513811  1647.776871
10  7917.458656  4471.806661
0   5488.586226  6778.487551
18  7781.789353  8817.471883
30  2646.291565  6180.536275
73  6048.850352   186.199423
33  5684.771055  2983.524977
90  3186.370956  3982.812401
4   4237.124339  2488.282682
76  2828.786819  3454.171455
77  1202.845416  9280.884853
12  5680.877566  6995.093274
31  7742.562661  4288.258241
