In [31]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_log_error

In [32]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
stores = pd.read_csv('stores.csv')
oil = pd.read_csv('oil.csv')
holidays = pd.read_csv('holidays_events.csv')

In [33]:
# Example of creating date-related features
train['date'] = pd.to_datetime(train['date'])
train['day'] = train['date'].dt.day
train['month'] = train['date'].dt.month
train['year'] = train['date'].dt.year
train['onpromotion'].fillna(train['onpromotion'].mean(), inplace=True)

In [34]:
# Load other relevant datasets
stores = pd.read_csv('stores.csv')

# Merge training data with store metadata
merged_train = pd.merge(train, stores, how='left', on='store_nbr')

# Encode categorical variables
merged_train = pd.get_dummies(merged_train, columns=['type', 'family'])

In [35]:
# Exclude non-numeric columns from features
X_train = merged_train.select_dtypes(include=['float64', 'int64', 'uint8'])
y_train = merged_train['sales']

In [40]:
# Load and preprocess the test data similarly
test = pd.read_csv('test.csv')
test['date'] = pd.to_datetime(test['date'])
test['day'] = test['date'].dt.day
test['month'] = test['date'].dt.month
test['year'] = test['date'].dt.year
test['onpromotion'].fillna(test['onpromotion'].mean(), inplace=True)
test = pd.merge(test, stores, how='left', on='store_nbr')
test = pd.get_dummies(test, columns=['type', 'family'])
X_test = test.select_dtypes(include=['float64', 'int64', 'uint8'])

# Ensure the columns in X_train and X_test are aligned
X_train, X_test = X_train.align(X_test, join='outer', axis=1, fill_value=0)

In [42]:
# Model Training
final_model = RandomForestRegressor()
final_model.fit(X_train, y_train)

RandomForestRegressor()

In [43]:
# Make Predictions
final_predictions = final_model.predict(X_test)

In [44]:
# Save predictions to a CSV file
submission = pd.DataFrame({'id': test['id'], 'sales': final_predictions})
submission.to_csv('submission.csv', index=False)