In [1]:
import pandas as pd

# Load the dataset
file_path = '/content/big_mart_data.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the data to understand its structure
data.head()


Unnamed: 0,Item_Weight,Item_Fat_Content,Item_Type,Item_MRP,Outlet_Size,Item_Outlet_Sales
0,9.3,Low Fat,Dairy,249.8092,Medium,3735.138
1,5.92,Regular,Drinks,48.2692,Medium,443.4228
2,17.5,Low Fat,Dairy,141.618,Medium,2097.27
3,8.93,Low Fat,Others,53.8614,High,994.7052
4,10.395,Regular,Others,51.4008,Medium,556.6088


In [2]:
!pip install pandas
!pip install scikit-learn

import pandas as pd
from sklearn.preprocessing import OneHotEncoder

# One-hot encoding the categorical variables
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
encoded_data = encoder.fit_transform(data[['Item_Fat_Content', 'Item_Type', 'Outlet_Size']])

# Convert encoded_data to DataFrame with string column names
encoded_df = pd.DataFrame(encoded_data, columns=[str(i) for i in range(encoded_data.shape[1])])

data_encoded = pd.concat([data, encoded_df], axis=1)

# Display the first few rows of the encoded data
data_encoded.head()




Unnamed: 0,Item_Weight,Item_Fat_Content,Item_Type,Item_MRP,Outlet_Size,Item_Outlet_Sales,0,1,2,3,4,5,6,7,8
0,9.3,Low Fat,Dairy,249.8092,Medium,3735.138,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
1,5.92,Regular,Drinks,48.2692,Medium,443.4228,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
2,17.5,Low Fat,Dairy,141.618,Medium,2097.27,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
3,8.93,Low Fat,Others,53.8614,High,994.7052,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
4,10.395,Regular,Others,51.4008,Medium,556.6088,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Prepare the data
X = data_encoded.drop(['Item_Outlet_Sales', 'Item_Fat_Content', 'Item_Type', 'Outlet_Size'], axis=1) # Drop original categorical columns
y = data_encoded['Item_Outlet_Sales']

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Train a Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 4: Make predictions
y_pred = model.predict(X_test)

# Step 5: Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

mse, r2


(1136094.7351023094, 0.48105806659656525)

In [4]:
pip install streamlit




In [5]:
# creating the script
%%writefile app.py

import streamlit as st

# Title of the app
st.title("Model Input Interface")

# Input field for user to enter data
user_input = st.text_input("Enter your input:")

# Display the input back to the user
st.write("You entered:", user_input)

# Button to submit the input
if st.button("Submit"):
    st.write("Processing your input...")
    # Here you can add the code to process the input with your model


Writing app.py


In [6]:
import pickle

# Assuming your model is already trained (e.g., the LinearRegression model)
with open('sales_prediction_model.pkl', 'wb') as file:
    pickle.dump(model, file)


In [8]:
# running the app
!streamlit run app.py &>/dev/null&

In [13]:
!pip install pyngrok # Install the pyngrok module

# making the locally-hosted web application to be publicly accessible
from pyngrok import ngrok

# Set your authtoken
ngrok.set_auth_token("pyngrok") # Replace YOUR_AUTHTOKEN with your actual token

public_url = ngrok.connect('8501')
public_url



<NgrokTunnel: "https://56b3-34-134-248-182.ngrok-free.app" -> "http://localhost:8501">