In [None]:
# Import necessary libraries for processing
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Load the dataset (replace with your actual dataset path or DataFrame)
data = pd.read_csv('/content/Walmart_Sales.csv')

# Preprocessing
# Convert 'Date' to datetime, and drop the 'Date' column if it's present
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y')
data = data.drop('Date', axis=1)

# Fill any missing values in the dataset (if any) using forward fill method
data.fillna(method='ffill', inplace=True)

# Separating features and target ('Weekly_Sales')
X = data.drop('Weekly_Sales', axis=1)  # Features
y = data['Weekly_Sales']  # Target

# Binarize the target (for classification purpose, sales above the median are "high", otherwise "low")
median_sales = np.median(y)
y = np.where(y > median_sales, 1, 0)  # 1 for high sales, 0 for low sales

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# List of models with their names and instances
models = [
    ('Naive Bayes', GaussianNB()),
    ('KNN', KNeighborsClassifier(n_neighbors=8)),
    ('RandomForestClassifier', RandomForestClassifier()),
    ('DecisionTreeClassifier', DecisionTreeClassifier()),
    ('SVM', SVC(kernel='linear'))
]

# Loop through each model, fit, predict and calculate metrics
results = []

for name, model in models:
    model.fit(X_train, y_train)  # Train the model
    y_pred = model.predict(X_test)  # Make predictions

    # Calculate confusion matrix and metrics
    cm = confusion_matrix(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    # Store results for comparison
    results.append({
        'Model': name,
        'Confusion Matrix': cm,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1
    })

# Convert results into a DataFrame for display
results_df = pd.DataFrame(results)

# Display the results
print(results_df)


  data.fillna(method='ffill', inplace=True)


                    Model          Confusion Matrix  Accuracy  Precision  \
0             Naive Bayes  [[369, 270], [193, 455]]  0.640249   0.627586   
1                     KNN   [[582, 57], [142, 506]]  0.845377   0.898757   
2  RandomForestClassifier    [[605, 34], [31, 617]]  0.949495   0.947773   
3  DecisionTreeClassifier    [[594, 45], [33, 615]]  0.939394   0.931818   
4                     SVM  [[380, 259], [174, 474]]  0.663559   0.646658   

     Recall  F1 Score  
0  0.702160  0.662782  
1  0.780864  0.835673  
2  0.952160  0.949962  
3  0.949074  0.940367  
4  0.731481  0.686459  


In [None]:
# Import necessary libraries for processing
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import datetime

# Load the dataset (replace with your actual dataset path or DataFrame)
data = pd.read_csv('/content/Walmart_Sales.csv')

# Preprocessing
# Convert 'Date' to datetime
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y')

# Fill any missing values in the dataset using forward fill
data.ffill(inplace=True)  # Updated to avoid FutureWarning

# Separating features and target ('Weekly_Sales')
X = data.drop(['Weekly_Sales', 'Date'], axis=1)  # Features (excluding 'Date' for prediction)
y = data['Weekly_Sales']  # Target variable (Sales)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Using RandomForestRegressor for regression prediction
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Ask user for the prediction period (next week or month)
prediction_period = input("Would you like to predict for the next 'week' or 'month'? ").strip().lower()

# Get the last known data row to use for future prediction
last_row = data.iloc[-1].drop(['Weekly_Sales', 'Date'])  # Take the last row excluding sales and date

# Ensure the row is passed as a DataFrame to preserve feature names
last_row_df = pd.DataFrame([last_row], columns=X.columns)

# Predict for the next period using the current date
current_date = datetime.datetime.now()

if prediction_period == 'week':
    future_date = current_date + datetime.timedelta(weeks=1)  # Predict for next week
elif prediction_period == 'month':
    future_date = current_date + pd.DateOffset(months=1)  # Predict for next month
else:
    print("Invalid input. Please enter either 'week' or 'month'.")
    exit()

# Predict the next period's sales using the RandomForestRegressor
future_sales = rf_model.predict(last_row_df)[0]  # Corrected feature name handling

# Display the predicted result
print(f"Predicted Weekly Sales for the next {prediction_period}: {future_sales:.2f} on {future_date.strftime('%d-%m-%Y')}")

# Optionally, evaluate the model on test data
y_pred = rf_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error on test data: {mse:.2f}")


Would you like to predict for the next 'week' or 'month'? week
Predicted Weekly Sales for the next week: 741893.97 on 20-10-2024
Mean Squared Error on test data: 21677879001.43
