In [2]:
%pwd

'/home/sarath_kumar/ImagoAI/notebook'

In [None]:
# import os

# os.chdir("../")
# %pwd

'/home/sarath_kumar/ImagoAI'

In [13]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE

# Load dataset
df = pd.read_csv("data/TASK-ML-INTERN.csv")  # Replace with actual dataset
df.drop("hsi_id", axis=1, inplace=True)

# Remove outliers using IQR
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1
df = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)]

# Separate features and target
X = df.iloc[:, :-1].values  # All except last column
y = df.iloc[:, -1].values   # Last column (target variable)

# Normalize input features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply t-SNE
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

# Reshape data for CNN and LSTM (samples, time steps, features)
# X_reshaped = X_scaled.reshape(X_scaled.shape[0], X_scaled.shape[1], 1)

# Train-Test Split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)


In [14]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler


# Define the MLP Model
mlp_model = Sequential([
    Dense(512, activation='relu', input_shape=(X_pca.shape[1],)),  
    BatchNormalization(),
    Dropout(0.4),

    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(128, activation='relu'),
    Dropout(0.2),

    Dense(1, activation='linear')  # Regression output
])

# Compile the Model
mlp_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='mse', metrics=['mae'])

# Early Stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, mode='min')

# Train the Model
mlp_model.fit(X_train, y_train, 
              epochs=50, batch_size=16, 
              validation_data=(X_test, y_test),
              callbacks=[early_stopping])

# Evaluate Model
test_loss, test_mae = mlp_model.evaluate(X_test, y_test)
print(f"Test MAE: {test_mae:.4f}")


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - loss: 1011773.6875 - mae: 642.2027 - val_loss: 933989.7500 - val_mae: 582.2896
Epoch 2/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 1041879.8750 - mae: 614.7473 - val_loss: 911131.3125 - val_mae: 573.6258
Epoch 3/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 1128195.2500 - mae: 623.7294 - val_loss: 882047.8125 - val_mae: 561.8190
Epoch 4/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 1125606.0000 - mae: 657.3262 - val_loss: 836159.0000 - val_mae: 542.0676
Epoch 5/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 937587.8125 - mae: 581.3951 - val_loss: 778767.1875 - val_mae: 517.8513
Epoch 6/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 987492.3750 - mae: 595.1503 - val_loss: 708979.3125 - val_mae: 497.1718
Epoch 7/50
[1m20

In [15]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Predictions
y_pred_cnn = mlp_model.predict(X_test)


# Compute metrics
def evaluate_model(y_test, y_pred):
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    print(f"MAE: {mae:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R² Score: {r2:.4f}")

evaluate_model(y_test, y_pred_cnn)



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
MAE: 522.6732
RMSE: 735.7736
R² Score: 0.0969


### TSNE Implementation

In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE

# Load dataset
df = pd.read_csv("data/TASK-ML-INTERN.csv")  # Replace with actual dataset
df.drop("hsi_id", axis=1, inplace=True)

# Remove outliers using IQR
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1
df = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)]

# Separate features and target
X = df.iloc[:, :-1].values  # All except last column
y = df.iloc[:, -1].values   # Last column (target variable)

# Normalize input features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply t-SNE
tsne = TSNE(n_components=2, perplexity=30, learning_rate=200, random_state=42)
X_tsne = tsne.fit_transform(X_scaled)

# Reshape data for CNN and LSTM (samples, time steps, features)
# X_reshaped = X_scaled.reshape(X_scaled.shape[0], X_scaled.shape[1], 1)

# Train-Test Split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X_tsne, y, test_size=0.2, random_state=42)


In [9]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler


# Define the MLP Model
mlp_model = Sequential([
    Dense(512, activation='relu', input_shape=(X_tsne.shape[1],)),  
    BatchNormalization(),
    Dropout(0.4),

    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(128, activation='relu'),
    Dropout(0.2),

    Dense(1, activation='linear')  # Regression output
])

# Compile the Model
mlp_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='mse', metrics=['mae'])

# Early Stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, mode='min')

# Train the Model
mlp_model.fit(X_train, y_train, 
              epochs=50, batch_size=16, 
              validation_data=(X_test, y_test),
              callbacks=[early_stopping])

# Evaluate Model
test_loss, test_mae = mlp_model.evaluate(X_test, y_test)
print(f"Test MAE: {test_mae:.4f}")


Epoch 1/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - loss: 1149574.2500 - mae: 645.1843 - val_loss: 930534.2500 - val_mae: 580.6826
Epoch 2/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 1245786.8750 - mae: 692.5543 - val_loss: 908563.8125 - val_mae: 571.9672
Epoch 3/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 1334143.7500 - mae: 711.0934 - val_loss: 875424.1250 - val_mae: 558.5152
Epoch 4/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 1340899.8750 - mae: 677.3776 - val_loss: 835190.0625 - val_mae: 540.8154
Epoch 5/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 1185478.1250 - mae: 633.5578 - val_loss: 780180.1250 - val_mae: 517.7187
Epoch 6/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 1189517.2500 - mae: 605.3069 - val_loss: 728562.9375 - val_mae: 500.7755
Epoc

In [10]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Predictions
y_pred_cnn = mlp_model.predict(X_test)


# Compute metrics
def evaluate_model(y_test, y_pred):
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    print(f"MAE: {mae:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R² Score: {r2:.4f}")

evaluate_model(y_test, y_pred_cnn)



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
MAE: 578.0734
RMSE: 784.7893
R² Score: -0.0275
