In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder
import pickle

# Load your data
data_path = 'shopmanage.csv'  # Update the path accordingly
data = pd.read_csv(data_path)

# One-hot encode the 'Day' column
encoder = OneHotEncoder(sparse=False)
encoded_days = encoder.fit_transform(data[['Day']])

# Create a DataFrame from the encoded days
encoded_days_df = pd.DataFrame(encoded_days, columns=encoder.get_feature_names_out(['Day']))

# Combine the original data with the encoded days
data_with_encoded_days = pd.concat([data, encoded_days_df], axis=1)

# Prepare the data for model training
X = data_with_encoded_days[encoder.get_feature_names_out(['Day'])]
y = data_with_encoded_days['Customer Count']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Predicting on the test set
y_pred = model.predict(X_test)

# Calculating the model performance
mse = mean_squared_error(y_test, y_pred)
print(f"Model's Mean Squared Error (MSE): {mse}")

# Saving the model to a pickle file
pickle.dump(model, open('crowd.pkl', 'wb'))
pickle.dump(encoder,open('encoder.pkl','wb'))




Model's Mean Squared Error (MSE): 731.1669999999998


In [4]:
day_str='Friday'
data1 = {f'Day_{x}': 0 for x in ['Friday', 'Monday', 'Saturday', 'Sunday', 'Thursday', 'Tuesday', 'Wednesday']}
if f'Day_{day_str}' in data1:
    data1[f'Day_{day_str}'] = 1
input_df1=pd.DataFrame([data1])
input_df1
model.predict(input_df1)

array([37.4])