In [1]:
# Importing useful libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
# load data and display head
data = pd.read_excel('Train.xlsx')
data.head()

In [None]:
print(f'data has {data.shape[0]} rows and {data.shape[1]} columns')

In [None]:
# Check statistics of data
data.describe(include='all')

In [None]:
data = data.rename(columns={'Weekday (Mon = 1; Sun = 7)':'Weekday', 'Public Holiday?':'Public Holiday',
                           'Special Event?':'Special Event', 'Max. Wind (km/h)':'Max. Wind'})

In [None]:
data.info()

In [None]:
# Separate data into categorical and numerical types
categorical_features = ['Weekday', 'Public Holiday', 'Special Event', 'Store open?']

numerical_features = ['Min. Temp', 'Max. Temp', 'Avg. Temp', 'Precipitation', 'Sunshine Hours', 'Max. Wind',]

In [None]:
data.isna().sum()

In [None]:
data['Public Holiday'].unique()

In [None]:
data['Public Holiday']=data['Public Holiday'].fillna('No')

In [None]:
data['Public Holiday'].nunique()

In [None]:
data['Public Holiday'].unique()

In [None]:
data['Special Event'].nunique()

In [None]:
data['Special Event'].unique()

In [None]:
data['Special Event'] = data['Special Event'].fillna('No')

In [None]:
data['Special Event'].unique()

In [None]:
data['Store open?'].unique()

In [None]:
data['Store open?'].nunique()

In [None]:
data['Food sales'] = np.where((data['Store open?'] == 'No'), 0, data['Food sales'])

In [None]:
data.isna().sum()

In [None]:
data.loc[data["Max. Wind"]=="-"]

In [None]:
data['Max. Wind'] = data['Max. Wind'].replace("-", 0).astype('float64')

In [None]:

sns.histplot(x=data['Max. Wind'], binwidth=2)


In [None]:
data['Max. Wind'].median()

In [None]:
mode = data['Max. Wind'].mode()
mode

In [None]:
data['Max. Wind'] = data['Max. Wind'].replace(0, 35)

In [None]:
sns.histplot(x=data['Max. Wind'], binwidth=2)

In [None]:
sns.regplot(x='Food sales', y='Weekday', data=data)

In [None]:
sns.pairplot(data)

In [None]:
corr_matrix = data.corr()
corr_matrix

In [None]:
fig,ax = plt.subplots(figsize=(15,10))
ax = sns.heatmap(corr_matrix, annot=True, fmt='.2f')

### Data Modelling

In [None]:
from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()
cat_feat = ['Public Holiday', 'Special Event', 'Store open?']

for i in cat_feat:
    data[i] = labelencoder.fit_transform(data[i])
    
data.head()

In [None]:
corr_matrix = data.corr()
corr_matrix

In [None]:
data['Special Event'].nunique()

In [None]:
data['Special Event'] = np.where((data['Special Event'] == 'NaN'), 0, data['Special Event'])

In [None]:
data['Special Event'].head()

In [None]:
data['Special Event']=data['Special Event'].fillna('No')

In [None]:
data['Special Event'].head()

In [None]:
from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()
cat_feat = ['Public Holiday', 'Special Event', 'Store open?']

for i in cat_feat:
    data[i] = labelencoder.fit_transform(data[i])
    
data.head()

In [None]:
data = data.drop('Date', axis=1)

In [None]:
data.head()

In [None]:
#Split data into X and y
X = data.drop('Food sales', axis=1)
y = data['Food sales']

In [None]:
from sklearn.model_selection import train_test_split
np.random.seed(42)
X_train, X_test, y_train, y_test =train_test_split(X,y, test_size=0.2)

In [None]:
X_train.head()

In [None]:
from sklearn import tree
from sklearn.linear_model import LinearRegression
from sklearn import svm

models = {'Linear Regression': LinearRegression(),
          'Decision Tree': tree.DecisionTreeRegressor(),
          'Support Vector Machine': svm.SVR()}

def fit_and_score(models, X_train, X_test, y_train, y_test):
    np.random.seed(42)
    
    model_scores = {}
    
    for name, model in models.items():
        print(name, ":", model)
        model.fit(X_train, y_train)
        model_scores[name] = model.score(X_test, y_test)
        
    return model_scores
    


In [None]:
model_scores = fit_and_score(models = models, X_train = X_train, X_test = X_test, y_train = y_train, y_test = y_test)
model_scores