<a href="https://colab.research.google.com/github/sri976/generative-AI-2025/blob/main/2033_W6_AS6_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.pipeline import Pipeline

# Load dataset
dataset_url = "/content/drive/MyDrive/Housing.csv"
df = pd.read_csv(dataset_url)

# Assuming the **first** column is the target variable (housing price)
X = df.drop(columns=df.columns[0])  # Drop the **first** variable column(likely 'price')
y = df.iloc[:, 0].values #select the **first** column (likely 'price')as the target

# Identify categorical and numerical features
categorical_features = X.select_dtypes(include=['object']).columns.tolist()
numerical_features = X.select_dtypes(exclude=['object']).columns.tolist()

# Create preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), categorical_features) # sparse=False for compatibility with TensorFlow
    ])

# Create pipeline with preprocessor and model
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', Sequential([
        Dense(18, activation='swish', input_shape=(preprocessor.fit_transform(X).shape[1],)),
        Dense(26, activation='swish'),
        Dense(20, activation='swish'),
        Dense(15, activation='swish'),
        Dense(1)  # Output layer
    ]))
])

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit and transform data using the pipeline

# Fit and transform the preprocessor on the training data
X_train_transformed = pipeline.named_steps['preprocessor'].fit_transform(X_train, y_train)

# Transform the testing data using the fitted preprocessor
X_test_transformed = pipeline.named_steps['preprocessor'].transform(X_test)

# Compile the model before fitting # This line was moved up
pipeline.named_steps['model'].compile(optimizer='rmsprop', loss='mean_squared_error', metrics=['mae'])


# Fit the model using the transformed training data
pipeline.named_steps['model'].fit(X_train_transformed, y_train, epochs=200, batch_size=64, validation_data=(X_test_transformed, y_test))

# Make predictions using the transformed testing data
y_pred = pipeline.named_steps['model'].predict(X_test_transformed)


# Train the model
#pipeline.named_steps['model'].fit(X_train, y_train, epochs=200, batch_size=64, validation_data=(X_test, y_test)) # This line was removed because it was attempting to train on unprocessed data and was redundant


# Evaluate model performance
y_pred = pipeline.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmae = np.sqrt(mean_absolute_error(y_test, y_pred))
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Absolute Error: {rmae}")

# Save the model (save the entire pipeline)
# !pip install joblib
import joblib
joblib.dump(pipeline, "housing_price_pipeline.pkl")

# Load and deploy the model
def predict_house_price(input_data):
    loaded_pipeline = joblib.load("housing_price_pipeline.pkl")
    # Input data should be a pandas DataFrame with the same columns as the original data
    input_df = pd.DataFrame([input_data])
    prediction = loaded_pipeline.predict(input_df)
    return prediction[0]

# Example usage:
# sample_input = X_test[0]  # Assuming X_test is a DataFrame or a dictionary-like object
# predicted_price = predict_house_price(sample_input)
# print("Predicted Price:", predicted_price)

Epoch 1/200


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 62ms/step - loss: 25413814321152.0000 - mae: 4708655.0000 - val_loss: 30129992499200.0000 - val_mae: 5007536.5000
Epoch 2/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 26320283107328.0000 - mae: 4789295.5000 - val_loss: 30129988304896.0000 - val_mae: 5007536.0000
Epoch 3/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 24566061596672.0000 - mae: 4651378.5000 - val_loss: 30129988304896.0000 - val_mae: 5007535.5000
Epoch 4/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 25449103097856.0000 - mae: 4727315.0000 - val_loss: 30129984110592.0000 - val_mae: 5007535.5000
Epoch 5/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 24038837583872.0000 - mae: 4587627.5000 - val_loss: 30129977819136.0000 - val_mae: 5007535.5000
Epoch 6/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0

