# Regression Models on Scaled Data

### Correlation of target variable

In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv("pirvision_transformed.csv")

# Calculate correlation of all numeric features with 'Temperature_F'
correlations_with_label = df.corr(numeric_only=True)['Temperature_F'].sort_values(ascending=False)

# Display sorted correlations
print(correlations_with_label)

Temperature_F    1.000000
PIR_4            0.436447
PIR_47           0.423147
PIR_3            0.418510
PIR_48           0.414124
                   ...   
PIR_11          -0.310372
PIR_39          -0.405392
PIR_41          -0.413534
PIR_40          -0.459982
Label           -0.929252
Name: Temperature_F, Length: 61, dtype: float64


### Linear regression (Single variable)

In [None]:
import pandas as pd
import plotly.express as px
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error


# Select the feature and target
X = df[['PIR_4']]
y = df['Temperature_F']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Model Training
model = LinearRegression()
model.fit(X_train, y_train)

# Prediction
y_pred = model.predict(X_test)

# Evaluation
print("R² Score:", r2_score(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))

# Prepare DataFrame for plotting
plot_df = X_test.copy()
plot_df['Actual Temperature'] = y_test.values
plot_df['Predicted Temperature'] = y_pred

# Plot using Plotly
fig = px.scatter(plot_df, x='PIR_4', y='Actual Temperature',
                 title='Simple Linear Regression: PIR_4 vs Temperature_F',
                 labels={'PIR_4': 'PIR_4 Sensor Reading', 'Actual Temperature': 'Actual Temperature'})
fig.add_scatter(x=plot_df['PIR_4'], y=plot_df['Predicted Temperature'],
                mode='lines', name='Regression Line')
fig.update_layout(width=800, height=500)
fig.show()


R² Score: 0.1944616761909982
Mean Squared Error: 0.8980608306360952


### Linear Regression (Multivariable)

In [None]:
import pandas as pd
import plotly.express as px
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error


# Select top correlated features with Temperature_F
top_features = ['PIR_47', 'PIR_3', 'PIR_48', 'PIR_4', 'PIR_46', 'PIR_49']
X = df[top_features]
y = df['Temperature_F']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluation
print("R² Score:", r2_score(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))

# Visualize predicted vs actual
result_df = pd.DataFrame({'Actual Temperature': y_test, 'Predicted Temperature': y_pred})
fig = px.scatter(result_df, x='Actual Temperature', y='Predicted Temperature',
                 title='Multivariable Linear Regression: Actual vs Predicted Temperature',
                 labels={'Actual Temperature': 'Actual Temperature', 'Predicted Temperature': 'Predicted Temperature'})
fig.add_shape(
    type="line", line=dict(dash="dash", color="red"),
    x0=result_df['Actual Temperature'].min(),
    y0=result_df['Actual Temperature'].min(),
    x1=result_df['Actual Temperature'].max(),
    y1=result_df['Actual Temperature'].max()
)
fig.update_layout(width=800, height=500)
fig.show()


R² Score: 0.2568057977673276
Mean Squared Error: 0.8285559890248705


### Polynomial Regression (Single Variable)

In [None]:
import pandas as pd
import plotly.express as px
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

# Feature and target
X_single = df[['PIR_4']]
y = df['Temperature_F']

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_single, y, test_size=0.3, random_state=42)


poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Train polynomial regression model
poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)

# Predict
y_pred = poly_model.predict(X_test_poly)

# Evaluate
print("Polynomial Regression (Single Variable)")
print("R2 Score:", r2_score(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))

# Prepare data for plotting
plot_df = X_test.copy()
plot_df['Actual Temperature_F'] = y_test.values
plot_df['Predicted Temperature_F'] = y_pred

# Sort by feature value for smooth plotting
plot_df = plot_df.sort_values(by='PIR_4')

# Plot with Plotly
fig = px.scatter(plot_df, x='PIR_4', y='Actual Temperature_F',
                 title='Polynomial Regression (Degree 2) - Single Variable: PIR_4',
                 labels={'PIR_4': 'PIR_4', 'Actual Temperature_F': 'Actual Temperature_F'})

fig.add_scatter(x=plot_df['PIR_4'], y=plot_df['Predicted Temperature_F'],
                mode='lines', name='Polynomial Regression Line')

fig.update_layout(width=800, height=500)
fig.show()


Polynomial Regression (Single Variable)
R2 Score: 0.20945620085619432
Mean Squared Error: 0.8813440651169263


### Ploynomial Regression (Multivariable)

In [None]:
import pandas as pd
import plotly.express as px
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

feature_cols = ['PIR_47', 'PIR_3', 'PIR_48', 'PIR_4', 'PIR_46', 'PIR_49']
X = df[feature_cols]
y = df['Temperature_F']

# Split dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create polynomial features (degree 2)
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Train linear regression on polynomial features
model = LinearRegression()
model.fit(X_train_poly, y_train)

# Predict on test set
y_pred = model.predict(X_test_poly)

# Evaluate
print("Multivariable Polynomial Regression (Degree 2)")
print("R2 Score:", r2_score(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))

# Plot actual vs predicted (scatter plot)
plot_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
plot_df = plot_df.reset_index(drop=True)

fig = px.scatter(plot_df, x='Actual', y='Predicted',
                 title='Actual vs Predicted Temperature_F (Multivariable Polynomial Regression)',
                 labels={'Actual': 'Actual Temperature_F', 'Predicted': 'Predicted Temperature_F'})

fig.add_shape(
    type='line',
    x0=plot_df['Actual'].min(),
    y0=plot_df['Actual'].min(),
    x1=plot_df['Actual'].max(),
    y1=plot_df['Actual'].max(),
    line=dict(color='red', dash='dash')
)

fig.update_layout(width=700, height=500)
fig.show()


Multivariable Polynomial Regression (Degree 2)
R2 Score: 0.2550621836726361
Mean Squared Error: 0.8304998711170138


### Decision Tree Regressor

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
import plotly.express as px
import plotly.graph_objects as go

# Specify your feature columns and target column
feature_cols = ['PIR_47', 'PIR_3', 'PIR_48', 'PIR_4', 'PIR_46', 'PIR_49']
target_col = 'Temperature_F'

# Split features and target
X = df[feature_cols]
y = df[target_col]

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize Decision Tree Regressor
regressor = DecisionTreeRegressor(random_state=42)

# Train the model
regressor.fit(X_train, y_train)

# Predict on test data
y_pred = regressor.predict(X_test)

# Evaluate model performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R^2 Score: {r2:.2f}")


Mean Squared Error: 0.52
R^2 Score: 0.53


In [None]:
# Create a Plotly scatter plot: Actual vs Predicted
results_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})

fig = px.scatter(results_df, x='Actual', y='Predicted',
                 title='Decision Tree Regression: Actual vs Predicted',
                 labels={'Actual': 'Actual Values', 'Predicted': 'Predicted Values'})
fig.add_trace(go.Scatter(x=results_df['Actual'], y=results_df['Actual'],
                         mode='lines', name='Perfect Prediction', line=dict(color='red')))
fig.show()