In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder

data = pd.read_csv(r'C:\Users\Access\Downloads\Customer Purchasing Behaviors.csv')

print(data.head())

print(data.info())

data = data.dropna()

encoder = OneHotEncoder(drop='first')
region_encoded = encoder.fit_transform(data[['region']]).toarray()
region_encoded_df = pd.DataFrame(region_encoded, columns=encoder.get_feature_names_out(['region']))
data = pd.concat([data, region_encoded_df], axis=1)
data = data.drop('region', axis=1)

scaler = StandardScaler()
scaled_features = scaler.fit_transform(data[['age', 'annual_income', 'purchase_amount', 'purchase_frequency']])
scaled_features_df = pd.DataFrame(scaled_features, columns=['age', 'annual_income', 'purchase_amount', 'purchase_frequency'])
data = pd.concat([data.drop(['age', 'annual_income', 'purchase_amount', 'purchase_frequency'], axis=1), scaled_features_df], axis=1)

X = data.drop('loyalty_score', axis=1)
y = data['loyalty_score']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

plt.scatter(y_test, y_pred)
plt.xlabel('actual loyalty score')
plt.ylabel('predicted loyalty score')
plt.title('actual vs predicted loyalty score')
plt.show()