In [13]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the training and test datasets
train_data = pd.read_csv('/content/train_v9rqX0R.csv')
test_data = pd.read_csv('/content/test_AbJTz2l.csv')

# Data Preprocessing
# Handle missing values (e.g., by imputing the mean or median)
train_data['Item_Weight'].fillna(train_data['Item_Weight'].mean(), inplace=True)
test_data['Item_Weight'].fillna(test_data['Item_Weight'].mean(), inplace=True)

# Encoding Categorical Variables: Convert categorical data to numerical
train_data = pd.get_dummies(train_data, columns=['Item_Fat_Content', 'Item_Type', 'Outlet_Size', 'Outlet_Location_Type', 'Outlet_Type'])
test_data = pd.get_dummies(test_data, columns=['Item_Fat_Content', 'Item_Type', 'Outlet_Size', 'Outlet_Location_Type', 'Outlet_Type'])

# Define the features and target variable
X = train_data.drop(columns=['Item_Identifier', 'Outlet_Identifier', 'Item_Outlet_Sales'])
y = train_data['Item_Outlet_Sales']

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Build a regression model (e.g., Linear Regression)
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the validation set
y_pred = model.predict(X_val)

# Evaluate the model using a suitable metric (e.g., Mean Squared Error)
mse = mean_squared_error(y_val, y_pred)
print(f'Mean Squared Error: {mse}')

# Train the model on the entire training dataset
model.fit(X, y)

# Make predictions on the test dataset
test_predictions = model.predict(test_data.drop(columns=['Item_Identifier', 'Outlet_Identifier']))

# Add the predictions to the test dataset
test_data['Item_Outlet_Sales'] = test_predictions

# Save the results to a CSV file for submission
submission = test_data[['Item_Identifier', 'Outlet_Identifier', 'Item_Outlet_Sales']]
submission.to_csv('sales_predictions.csv', index=False)


Mean Squared Error: 1143863.1251237593
