In [None]:
!pip install plotly pandas numpy



In [None]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('AmesHousing.csv')

In [None]:
# Price Distribution Plot
def plot_price_distribution():
    fig = make_subplots(rows=1, cols=2,
                       subplot_titles=('Sale Price Distribution',
                                     'Log Sale Price Distribution'))

    # Original price distribution
    fig.add_trace(
        go.Histogram(x=df['SalePrice'], name='Sale Price',
                    nbinsx=50, histnorm='probability'),
        row=1, col=1
    )

    # Log-transformed price distribution
    fig.add_trace(
        go.Histogram(x=np.log(df['SalePrice']), name='Log Sale Price',
                    nbinsx=50, histnorm='probability'),
        row=1, col=2
    )

    fig.update_layout(
        title='House Price Distributions',
        showlegend=True,
        height=500
    )

    fig.show()

plot_price_distribution()

In [None]:
# Correlation Heatmap
def plot_correlation_heatmap():
    features = ['SalePrice', 'Overall Qual', 'Gr Liv Area',
               'Garage Cars', 'Total Bsmt SF', 'Full Bath',
               'Year Built', 'Lot Area']

    corr_matrix = df[features].corr()

    fig = go.Figure(data=go.Heatmap(
        z=corr_matrix,
        x=features,
        y=features,
        colorscale='Viridis',
        text=np.round(corr_matrix, 2),
        texttemplate='%{text}',
        textfont={"size": 10},
        hoverongaps=False))

    fig.update_layout(
        title='Feature Correlation Heatmap',
        height=600,
        width=800
    )

    fig.show()

plot_correlation_heatmap()

In [None]:
# Feature vs Price Scatter Plot
def plot_feature_relationships():
    fig = px.scatter(df, x='Gr Liv Area', y='SalePrice',
                    color='Overall Qual',
                    size='Lot Area',
                    hover_data=['Year Built'],
                    title='House Prices by Living Area and Quality')

    fig.update_layout(
        height=600,
        width=800
    )

    fig.show()

plot_feature_relationships()

In [None]:
# Predictions
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import numpy as np

# Features, target
feature_cols = [
    'Overall Qual', 'Gr Liv Area', 'Garage Cars', 'Total Bsmt SF',
    'Full Bath', 'Year Built', 'Lot Area'
]
X = df[feature_cols].copy()
y = df['SalePrice'].copy()

# Handle missing values
X = X.fillna(X.mean())
y = y.fillna(y.mean())

# Log transform target
y_log = np.log(y + 1)

# Train the model with best parameters
lasso_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', Lasso(alpha=0.001, max_iter=10000))
])

# Fit the model & make predictions
lasso_pipe.fit(X, y_log)
y_pred_log = lasso_pipe.predict(X)

# Transform predictions
y_pred = np.exp(y_pred_log) - 1

In [None]:
# Model Performance Plot
def plot_model_performance(y_true, y_pred):
    fig = go.Figure()

    # Scatter plot
    fig.add_trace(go.Scatter(
        x=y_true,
        y=y_pred,
        mode='markers',
        name='Predictions',
        marker=dict(color='blue', size=8, opacity=0.6)
    ))

    # Perfect prediction line
    max_val = max(max(y_true), max(y_pred))
    min_val = min(min(y_true), min(y_pred))
    fig.add_trace(go.Scatter(
        x=[min_val, max_val],
        y=[min_val, max_val],
        mode='lines',
        name='Perfect Prediction',
        line=dict(color='red', dash='dash')
    ))

    fig.update_layout(
        title='Predicted vs Actual House Prices',
        xaxis_title='Actual Price',
        yaxis_title='Predicted Price',
        height=600,
        width=800
    )

    fig.show()

plot_model_performance(y, y_pred)