In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [2]:
# Load the dataset
data = pd.read_csv('train.csv')

In [3]:
# Fill missing values
data['Item_Weight'].fillna(data['Item_Weight'].mean(), inplace=True)
data['Outlet_Size'].fillna(data['Outlet_Size'].mode()[0], inplace=True)

In [4]:
# Features and target
X = data.drop(columns=['Item_Outlet_Sales', 'Item_Identifier'])
y = data['Item_Outlet_Sales']

In [5]:
# Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(exclude=['object']).columns

In [6]:
# Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[('num', 'passthrough', numerical_cols),
                  ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)])

In [7]:
# Define the model pipeline
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))])

In [8]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Train the model
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

Mean Squared Error: 1164985.3077165987


In [10]:
# Function to predict sales for a new product
def predict_sales(item_weight, item_fat_content, item_visibility, item_type, item_mrp, outlet_identifier, outlet_establishment_year, outlet_size, outlet_location_type, outlet_type):
    # Prepare the input data
    input_data = pd.DataFrame({
        'Item_Weight': [item_weight],
        'Item_Fat_Content': [item_fat_content],
        'Item_Visibility': [item_visibility],
        'Item_Type': [item_type],
        'Item_MRP': [item_mrp],
        'Outlet_Identifier': [outlet_identifier],
        'Outlet_Establishment_Year': [outlet_establishment_year],
        'Outlet_Size': [outlet_size],
        'Outlet_Location_Type': [outlet_location_type],
        'Outlet_Type': [outlet_type]
    })
    
    # Predict sales
    prediction = model.predict(input_data)
    return prediction[0]

In [11]:
# Example input for prediction
item_weight = 9.3
item_fat_content = 'Low Fat'
item_visibility = 0.016047
item_type = 'Snack Foods'
item_mrp = 249.8092
outlet_identifier = 'OUT049'
outlet_establishment_year = 1999
outlet_size = 'Medium'
outlet_location_type = 'Tier 1'
outlet_type = 'Supermarket Type1'

predicted_sales = predict_sales(item_weight, item_fat_content, item_visibility, item_type, item_mrp, outlet_identifier, outlet_establishment_year, outlet_size, outlet_location_type, outlet_type)
print(f'Predicted Sales: {predicted_sales}')

Predicted Sales: 3965.018766000002


In [12]:
#Taken20 Inputs from the User
# Function to predict sales for a new product
def predict_sales():
    # Collect user input
    item_weight = float(input("Enter the Item Weight: "))
    item_fat_content = input("Enter the Item Fat Content (e.g., Low Fat, Regular): ")
    item_visibility = float(input("Enter the Item Visibility: "))
    item_type = input("Enter the Item Type (e.g., Snack Foods, Dairy): ")
    item_mrp = float(input("Enter the Item MRP: "))
    outlet_identifier = input("Enter the Outlet Identifier (e.g., OUT049): ")
    outlet_establishment_year = int(input("Enter the Outlet Establishment Year: "))
    outlet_size = input("Enter the Outlet Size (e.g., Small, Medium, High): ")
    outlet_location_type = input("Enter the Outlet Location Type (e.g., Tier 1, Tier 2, Tier 3): ")
    outlet_type = input("Enter the Outlet Type (e.g., Supermarket Type1, Grocery Store): ")
    
    # Prepare the input data
    input_data = pd.DataFrame({
        'Item_Weight': [item_weight],
        'Item_Fat_Content': [item_fat_content],
        'Item_Visibility': [item_visibility],
        'Item_Type': [item_type],
        'Item_MRP': [item_mrp],
        'Outlet_Identifier': [outlet_identifier],
        'Outlet_Establishment_Year': [outlet_establishment_year],
        'Outlet_Size': [outlet_size],
        'Outlet_Location_Type': [outlet_location_type],
        'Outlet_Type': [outlet_type]
    })
    
    # Predict sales
    prediction = model.predict(input_data)
    return prediction[0]

# Example usage
predicted_sales = predict_sales()
print(f'Predicted Sales: {predicted_sales}')

Enter the Item Weight: 20
Enter the Item Fat Content (e.g., Low Fat, Regular): Regular
Enter the Item Visibility: 0.1908
Enter the Item Type (e.g., Snack Foods, Dairy): Other
Enter the Item MRP: 245
Enter the Outlet Identifier (e.g., OUT049): OUT048
Enter the Outlet Establishment Year: 2001
Enter the Outlet Size (e.g., Small, Medium, High): High
Enter the Outlet Location Type (e.g., Tier 1, Tier 2, Tier 3): Tier 3
Enter the Outlet Type (e.g., Supermarket Type1, Grocery Store): Supermarket Type3
Predicted Sales: 4816.976445999997
