In [9]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import pickle

# Load your dataset
data_articles = pd.read_csv('/content/forarticles.csv')

# Initialize OneHotEncoder
encoder = OneHotEncoder(handle_unknown='ignore')  # The sparse=False is removed since dense array is the default output now

# Fit and transform 'Day' column with OneHotEncoder
day_encoded = encoder.fit_transform(data_articles[['Day']])

# Convert the array back to a DataFrame
day_encoded_df = pd.DataFrame(day_encoded.toarray(), columns=encoder.get_feature_names_out(['Day']))

# Features for all articles
X = pd.concat([day_encoded_df, data_articles[['Customer Count']]], axis=1)

# Targets for each article, directly using the count
y_A = data_articles['ArticleA']
y_B = data_articles['ArticleB']
y_C = data_articles['ArticleC']

# Splitting the data into training and testing sets for each article
X_train_A, X_test_A, y_train_A, y_test_A = train_test_split(X, y_A, test_size=0.2, random_state=42)
X_train_B, X_test_B, y_train_B, y_test_B = train_test_split(X, y_B, test_size=0.2, random_state=42)
X_train_C, X_test_C, y_train_C, y_test_C = train_test_split(X, y_C, test_size=0.2, random_state=42)

# Initializing the regression models for each article
model_A = RandomForestRegressor(random_state=42)
model_B = RandomForestRegressor(random_state=42)
model_C = RandomForestRegressor(random_state=42)

# Training the models
model_A.fit(X_train_A, y_train_A)
model_B.fit(X_train_B, y_train_B)
model_C.fit(X_train_C, y_train_C)

# Saving the models and the encoder
pickle.dump(model_A, open('modela.pkl', 'wb'))
pickle.dump(model_B, open('modelb.pkl', 'wb'))
pickle.dump(model_C, open('modelc.pkl', 'wb'))
pickle.dump(encoder, open('encoder.pkl', 'wb'))


In [13]:
X_test_A.head()


Unnamed: 0,Day_Friday,Day_Monday,Day_Saturday,Day_Sunday,Day_Thursday,Day_Tuesday,Day_Wednesday,Customer Count
13,1.0,0.0,0.0,0.0,0.0,0.0,0.0,29
39,0.0,0.0,0.0,0.0,0.0,0.0,1.0,23
30,0.0,1.0,0.0,0.0,0.0,0.0,0.0,68
45,0.0,0.0,0.0,0.0,0.0,1.0,0.0,47
17,0.0,0.0,0.0,0.0,0.0,1.0,0.0,71


In [14]:
import pandas as pd
import pickle

# Load the trained model
with open('modela.pkl', 'rb') as f:
    model_a = pickle.load(f)

# Manually create a DataFrame with the features
# Let's assume we are predicting for Monday with 100 customers
# We need to set the correct flags for one-hot encoded days
data = {
    'Day_Friday': [1],
    'Day_Monday': [0],
    'Day_Saturday': [0],
    'Day_Sunday': [0],
    'Day_Thursday': [0],
    'Day_Tuesday': [0],
    'Day_Wednesday': [0],
    'Customer Count': [100]
}

features_df = pd.DataFrame(data)

# Now use the model to predict
prediction = model_a.predict(features_df)
print("Prediction for Article A:", prediction)


Prediction for Article A: [48.24]
