Linear Regression

In [None]:
# ===========================
# LINEAR REGRESSION FULL CODE
# ===========================

# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# ---------------------------
# 1. DATA LOADING
# ---------------------------
# Example: data.csv should have columns like ['feature1', 'feature2', 'target']
data = pd.read_csv('/content/customers.csv')

print("First 5 rows of data:")
print(data.head())

# ---------------------------
# 2. DATA CLEANING
# ---------------------------
# Remove duplicate rows
data = data.drop_duplicates()

# Check for missing values
print("\nMissing values:\n", data.isnull().sum())

# Fill missing numeric values with mean
data = data.fillna(data.median())

# ---------------------------
# 3. FEATURE SELECTION
# ---------------------------
# Independent (X) and dependent (y) variables
X = data.drop('Spending_Score', axis=1)
y = data['Spending_Score']

# ---------------------------
# 4. DATA PREPROCESSING
# ---------------------------
# Standardize features (scaling)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ---------------------------
# 5. TRAIN-TEST SPLIT
# ---------------------------
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# ---------------------------
# 6. MODEL TRAINING
# ---------------------------
model = LinearRegression()
model.fit(X_train, y_train)

# ---------------------------
# 7. PREDICTION
# ---------------------------
y_pred = model.predict(X_test)

# ---------------------------
# 8. EVALUATION
# ---------------------------
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nModel Evaluation:")
print("Mean Squared Error (MSE):", mse)
print("RÂ² Score:", r2)

# ---------------------------
# 9. SAMPLE OUTPUT
# ---------------------------
print("\nActual vs Predicted:")
result = pd.DataFrame({'Actual': y_test.values, 'Predicted': y_pred})
print(result.head())