In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler


In [None]:
df = pd.read_csv("online_shoppers_intention.csv")

In [None]:
df.columns = df.columns.str.strip()

In [None]:
df['Month'] = df['Month'].str.strip().str[:3].str.title()

In [None]:
month_map = {'Jan':1,'Feb':2,'Mar':3,'Apr':4,'May':5,
             'Jun':6,'Jul':7,'Aug':8,'Sep':9,'Oct':10,'Nov':11,'Dec':12}
df['Month'] = df['Month'].map(month_map)

In [None]:
le = LabelEncoder()
df['VisitorType'] = le.fit_transform(df['VisitorType'])  # 0 = New, 1 = Returning, 2 = Other
df['Weekend'] = df['Weekend'].astype(int)
df['Revenue'] = df['Revenue'].astype(int)

In [None]:
df['F'] = df['Administrative'] + df['Informational'] + df['ProductRelated']

In [None]:
df['S'] = df['PageValues'] * (1 - df['ExitRates'])

In [None]:
df['R'] = 12 - df['Month'] + 1

In [None]:
def compute_length(row):
    if row['VisitorType'] == 1:   # Returning Visitor
        return 3
    elif row['VisitorType'] == 0: # New Visitor
        return 1
    else:                         # Other
        return 2

df['L'] = df.apply(compute_length, axis=1)

In [None]:
lrfs_cols = ['L', 'R', 'F', 'S']
df_lrfs = df[lrfs_cols + ['Revenue']]

print(df_lrfs.head())


   L   R   F    S  Revenue
0  2  11   1  0.0        0
1  2  11   2  0.0        0
2  2  11   1  0.0        0
3  2  11   2  0.0        0
4  2  11  10  0.0        0


In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df_lrfs[lrfs_cols])

df_scaled = pd.DataFrame(X_scaled, columns=lrfs_cols)
df_scaled['Revenue'] = df_lrfs['Revenue']

print(df_scaled.head())

          L         R         F         S  Revenue
0  0.365812  1.665924 -0.721321 -0.316116        0
1  0.365812  1.665924 -0.699821 -0.316116        0
2  0.365812  1.665924 -0.721321 -0.316116        0
3  0.365812  1.665924 -0.699821 -0.316116        0
4  0.365812  1.665924 -0.527823 -0.316116        0


In [None]:
df_scaled.to_csv("lrfs_features.csv", index=False)
print("✅ Processed LRFS dataset saved at 'lrfs_features.csv'")

✅ Processed LRFS dataset saved at 'lrfs_features.csv'


In [None]:
print(df_scaled.describe())


                  L             R             F             S       Revenue
count  1.233000e+04  1.233000e+04  1.233000e+04  1.233000e+04  12330.000000
mean   2.137967e-16  2.212882e-16 -1.844069e-17  4.610172e-18      0.154745
std    1.000041e+00  1.000041e+00  1.000041e+00  1.000041e+00      0.361676
min   -2.437460e+00 -1.281578e+00 -7.428208e-01 -3.161159e-01      0.000000
25%    3.658123e-01 -9.868275e-01 -5.708228e-01 -3.161159e-01      0.000000
50%    3.658123e-01  1.921733e-01 -3.128257e-01 -3.161159e-01      0.000000
75%    3.658123e-01  7.816737e-01  1.601688e-01 -3.161159e-01      0.000000
max    3.169085e+00  1.665924e+00  1.529599e+01  1.928668e+01      1.000000
