<a href="https://colab.research.google.com/github/sadi1485/DeepLearning/blob/main/%5BSadichchha%5DOverfit_on_single_batch_%2B_complex_connections.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Assignment 4 - Overfit on single batch + complex connections

---

## Dataset & Preprocessing:

- Use a large dataset such as the UCI Covertype Dataset, but you can use your own (e.g. from project work).
- Preprocess the data by:
    1. Handling missing values.
    2. Normalizing numerical features.
    3. Encoding categorical variables.
- Split the dataset into training and validation sets.

In [13]:
!pip install ucimlrepo



In [14]:
# Load Dataset

from ucimlrepo import fetch_ucirepo
import pandas as pd

# fetch dataset
covertype = fetch_ucirepo(id=31)

# data (as pandas dataframes)
feature_names = covertype.variables['name'].to_list()
feature_names.remove('Cover_Type')  # Remove the target column name

x = covertype.data.features
X = pd.DataFrame(x, columns=feature_names)
y = covertype.data.targets

# metadata
print(covertype.metadata)

{'uci_id': 31, 'name': 'Covertype', 'repository_url': 'https://archive.ics.uci.edu/dataset/31/covertype', 'data_url': 'https://archive.ics.uci.edu/static/public/31/data.csv', 'abstract': 'Classification of pixels into 7 forest cover types based on attributes such as elevation, aspect, slope, hillshade, soil-type, and more.', 'area': 'Biology', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 581012, 'num_features': 54, 'feature_types': ['Categorical', 'Integer'], 'demographics': [], 'target_col': ['Cover_Type'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1998, 'last_updated': 'Sat Mar 16 2024', 'dataset_doi': '10.24432/C50K5N', 'creators': ['Jock Blackard'], 'intro_paper': None, 'additional_info': {'summary': 'Predicting forest cover type from cartographic variables only (no remotely sensed data).  The actual forest cover type for a given observation (30 x 30 meter cell) was determined from

In [15]:
X.head()

Unnamed: 0,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,Horizontal_Distance_To_Fire_Points,...,Soil_Type34,Soil_Type35,Soil_Type36,Soil_Type37,Soil_Type38,Soil_Type39,Soil_Type40,Wilderness_Area2,Wilderness_Area3,Wilderness_Area4
0,2596,51,3,258,0,510,221,232,148,6279,...,0,0,0,0,0,0,0,0,0,0
1,2590,56,2,212,-6,390,220,235,151,6225,...,0,0,0,0,0,0,0,0,0,0
2,2804,139,9,268,65,3180,234,238,135,6121,...,0,0,0,0,0,0,0,0,0,0
3,2785,155,18,242,118,3090,238,238,122,6211,...,0,0,0,0,0,0,0,0,0,0
4,2595,45,2,153,-1,391,220,234,150,6172,...,0,0,0,0,0,0,0,0,0,0


In [16]:
feature_names = covertype.variables['name'].to_list()
print(len(feature_names))
feature_names.remove('Cover_Type')  # Remove the target column name
print(len(feature_names))

# Handling Missing Data
print("Missing values in each column:\n", X.isnull().sum()) # No missing values

55
54
Missing values in each column:
 Elevation                             0
Aspect                                0
Slope                                 0
Horizontal_Distance_To_Hydrology      0
Vertical_Distance_To_Hydrology        0
Horizontal_Distance_To_Roadways       0
Hillshade_9am                         0
Hillshade_Noon                        0
Hillshade_3pm                         0
Horizontal_Distance_To_Fire_Points    0
Wilderness_Area1                      0
Soil_Type1                            0
Soil_Type2                            0
Soil_Type3                            0
Soil_Type4                            0
Soil_Type5                            0
Soil_Type6                            0
Soil_Type7                            0
Soil_Type8                            0
Soil_Type9                            0
Soil_Type10                           0
Soil_Type11                           0
Soil_Type12                           0
Soil_Type13                           0
So

In [17]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

# Separate numerical and categorical features
numeric_features = X.select_dtypes(include=['int64', 'float64']).columns
categorical_features = X.select_dtypes(include=['object', 'category', 'bool']).columns

print("Numeric features:\n", numeric_features)
print("Categorical features:\n", categorical_features, "\n") # No categorical features

# Normalize numerical features
X_normalized = (X - X.mean()) / X.std()

# Combine x and y to shuffle together
data = X_normalized.copy()
data['target'] = y

# Shuffle rows
data = data.sample(frac=1, random_state=42).reset_index(drop=True)

# Split into train and validation
train_size = int(0.8 * len(data))
train_data = data.iloc[:train_size]
val_data = data.iloc[train_size:]

# Separate x and y again
X_train = train_data.drop(columns='target')
y_train = train_data['target']
X_val = val_data.drop(columns='target')
y_val = val_data['target']

# Subtract 1 from target values before one-hot encoding
y_train_shifted = y_train - 1
y_val_shifted = y_val - 1

# One-hot encode labels
num_classes = 7
y_train = to_categorical(y_train_shifted, num_classes)
y_val = to_categorical(y_val_shifted, num_classes)

# Print shapes to check
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_val shape:", X_val.shape)
print("y_val shape:", y_val.shape)

Numeric features:
 Index(['Elevation', 'Aspect', 'Slope', 'Horizontal_Distance_To_Hydrology',
       'Vertical_Distance_To_Hydrology', 'Horizontal_Distance_To_Roadways',
       'Hillshade_9am', 'Hillshade_Noon', 'Hillshade_3pm',
       'Horizontal_Distance_To_Fire_Points', 'Wilderness_Area1', 'Soil_Type1',
       'Soil_Type2', 'Soil_Type3', 'Soil_Type4', 'Soil_Type5', 'Soil_Type6',
       'Soil_Type7', 'Soil_Type8', 'Soil_Type9', 'Soil_Type10', 'Soil_Type11',
       'Soil_Type12', 'Soil_Type13', 'Soil_Type14', 'Soil_Type15',
       'Soil_Type16', 'Soil_Type17', 'Soil_Type18', 'Soil_Type19',
       'Soil_Type20', 'Soil_Type21', 'Soil_Type22', 'Soil_Type23',
       'Soil_Type24', 'Soil_Type25', 'Soil_Type26', 'Soil_Type27',
       'Soil_Type28', 'Soil_Type29', 'Soil_Type30', 'Soil_Type31',
       'Soil_Type32', 'Soil_Type33', 'Soil_Type34', 'Soil_Type35',
       'Soil_Type36', 'Soil_Type37', 'Soil_Type38', 'Soil_Type39',
       'Soil_Type40', 'Wilderness_Area2', 'Wilderness_Area3',
     

## Model Architecture

Keep the number of trainable parameters as low as possible. Define the following neural network:
- Initial Layers: Build an MLP in Keras to process the input features.
- Custom Residual Block:
    - Using the Keras Functional API, create a block with at least two Dense layers with ReLU activations.
    - Implement a residual connection by adding the block’s input to its output (apply a linear projection with an extra Dense layer if the dimensions differ).
- Additional Skip Connection:
    - Implement an extra skip connection that bypasses one or more intermediate layers outside the residual block.
- Final Layers:
    - Add further Dense layers.
    - Include an output layer appropriate for the task (e.g., a single unit with sigmoid activation for binary classification).

In [18]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Add, ReLU, Softmax

# Define input
input_dim = 54
num_classes = 7
inputs = Input(shape=(input_dim,), name='input_layer')

# Initial Dense Layer
x = Dense(128, activation='relu', name='initial_dense')(inputs)

# Residual Block
res = Dense(128, activation='relu', name='res_block_dense1')(x)
res = Dense(128, activation='relu', name='res_block_dense2')(res)

# Project x to match dimensions if needed
if x.shape[-1] != res.shape[-1]:
    proj = Dense(64, name='res_projection')(x)
else:
    proj = x

# Add residual connection
res_out = Add(name='residual_add')([proj, res])
res_out = ReLU(name='residual_relu')(res_out)

# Additional skip connection outside residual block
skip = Dense(128, activation='relu', name='skip_connection')(x)

# Combine both outputs
combined = Add(name='combine_skip_res')([res_out, skip])

# Final layers
final = Dense(32, activation='relu', name='final_dense')(combined)
output = Dense(num_classes, activation='softmax', name='output_layer')(final)

# Build and summarize model
model = Model(inputs=inputs, outputs=output, name='Compact_MLP_with_Residuals_and_Skip')
model.summary()


## Visualization:

- Save your complete model (e.g., as a .h5 file or in JSON format).
- Open the saved model in the Netron app (https://netron.app/) and export the network diagram as an image.
- Ensure that the exported image clearly shows all parts of your architecture, including both residual and skip connections.

In [19]:
# Save the model as .h5
model.save('compact_mlp_with_residuals.h5')


# Save the architecture in JSON format
model_json = model.to_json()
with open('compact_mlp_with_residuals.json', 'w') as json_file:
    json_file.write(model_json)

# Save the weights in HDF5 format
model.save_weights('compact_mlp_with_residuals.weights.h5')  # Use .h5 for the weights



## Training & Evaluation:

- Overfitting Experiment:
  - Select a single batch of 128 samples from the training set.
  - Train your model exclusively on this batch until you approach 0 loss.
- Validation Check:
  - Evaluate the overfitted model on the validation set to confirm that it performs poorly, demonstrating a lack of generalization.
- Conclusions:
  - At the end of your code, print the following information:
    - Number of parameters:
    - Final training loss:
    - Final validation loss:

In [20]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

num_classes = 7


# Select a batch
batch_size = 128
X_train_batch = X_train[:batch_size]
y_train_batch = y_train[:batch_size]

# Compile the model with categorical crossentropy
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(
    X_train_batch, y_train_batch,
    epochs=100,
    batch_size=128,
    validation_data=(X_val, y_val),
    verbose=1
)


# Evaluate on the full validation set
val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_accuracy}")


Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step - accuracy: 0.4375 - loss: 2.1194 - val_accuracy: 0.4815 - val_loss: 1.8964
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.4609 - loss: 1.8886 - val_accuracy: 0.4834 - val_loss: 1.7609
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.4688 - loss: 1.7153 - val_accuracy: 0.4861 - val_loss: 1.6555
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.4688 - loss: 1.5796 - val_accuracy: 0.4916 - val_loss: 1.5709
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.4766 - loss: 1.4641 - val_accuracy: 0.5017 - val_loss: 1.4992
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - accuracy: 0.5156 - loss: 1.3655 - val_accuracy: 0.5119 - val_loss: 1.4364
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0

In [21]:
# Print information
num_parameters = model.count_params()
final_train_loss = history.history['loss'][-1]

print(f"Number of parameters: {num_parameters}")
print(f"Final training loss: {final_train_loss}")
print(f"Final validation loss: {val_loss}")

Number of parameters: 60935
Final training loss: 0.02207525447010994
Final validation loss: 2.450498342514038
