In [11]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [12]:
data = pd.read_csv('data.csv')

In [13]:
x = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

In [14]:
print(x)

[['East' 2012 'Q1']
 ['East' 2012 'Q1']
 ['West' 2012 'Q1']
 ...
 ['West' 2012 nan]
 ['West' 2014 'Q3']
 ['West' 2015 'Q4']]


In [15]:
print(y)

[-111.8  -342.91 -193.08 ... -135.16  -73.83  192.06]


In [16]:
data.isna().sum()

Region                    0
Year of Order Date        0
Quarter of Order Date    14
Profit                    7
dtype: int64

In [17]:
print(data.dtypes)

Region                    object
Year of Order Date         int64
Quarter of Order Date     object
Profit                   float64
dtype: object


In [18]:
rata_profit = data['Profit'].mean()
print(rata_profit)

181.2837540015492


In [19]:
data['Profit'] = data['Profit'].fillna('rata_profit')

In [20]:
data.isna().sum()

Region                    0
Year of Order Date        0
Quarter of Order Date    14
Profit                    0
dtype: int64

In [21]:
modus = data['Quarter of Order Date'].mode()
print(modus)

0    Q3
Name: Quarter of Order Date, dtype: object


In [22]:
data['Quarter of Order Date'] = data['Quarter of Order Date'].fillna('modus')

In [23]:
data.isna().sum()

Region                   0
Year of Order Date       0
Quarter of Order Date    0
Profit                   0
dtype: int64

In [24]:
print(data.dtypes)

Region                   object
Year of Order Date        int64
Quarter of Order Date    object
Profit                   object
dtype: object


In [25]:
print(x)

[['East' 2012 'Q1']
 ['East' 2012 'Q1']
 ['West' 2012 'Q1']
 ...
 ['West' 2012 nan]
 ['West' 2014 'Q3']
 ['West' 2015 'Q4']]


In [26]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(),[0,2])], remainder='passthrough')
x = np.array(ct.fit_transform(x))

In [27]:
print(x)

[[0.0 1.0 0.0 ... 0.0 0.0 2012]
 [0.0 1.0 0.0 ... 0.0 0.0 2012]
 [0.0 0.0 0.0 ... 0.0 0.0 2012]
 ...
 [0.0 0.0 0.0 ... 0.0 1.0 2012]
 [0.0 0.0 0.0 ... 0.0 0.0 2014]
 [0.0 0.0 0.0 ... 1.0 0.0 2015]]


In [28]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

In [29]:
print(y)

[1672  645 1046 ... 1426 2148 6006]


In [30]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 1)

In [31]:
print(x_train)

[[0.0 1.0 0.0 ... 1.0 0.0 2013]
 [0.0 1.0 0.0 ... 0.0 0.0 2015]
 [1.0 0.0 0.0 ... 1.0 0.0 2013]
 ...
 [0.0 1.0 0.0 ... 0.0 0.0 2012]
 [1.0 0.0 0.0 ... 1.0 0.0 2015]
 [0.0 0.0 1.0 ... 0.0 0.0 2012]]


In [32]:
print(x_test)

[[0.0 0.0 0.0 ... 0.0 0.0 2014]
 [0.0 0.0 0.0 ... 0.0 0.0 2013]
 [0.0 0.0 1.0 ... 0.0 0.0 2014]
 ...
 [0.0 1.0 0.0 ... 0.0 0.0 2013]
 [1.0 0.0 0.0 ... 0.0 0.0 2013]
 [0.0 1.0 0.0 ... 0.0 0.0 2014]]


In [33]:
print(y_train)

[ 494 6673  831 ...  209 4445   60]


In [34]:
print(y_test)

[2452 2493 1258 ... 1616 6188 2825]


In [36]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train[:,-1:] = sc.fit_transform(x_train[:,-1:])
x_test[:,-1:] = sc.fit_transform(x_test[:,-1:])
y_train = sc.fit_transform(y_train.reshape(-1,1))
y_test = sc.fit_transform(y_test.reshape(-1,1))

In [37]:
print(x_train)

[[0.0 1.0 0.0 ... 1.0 0.0 -0.42506261514329724]
 [0.0 1.0 0.0 ... 0.0 0.0 1.3488477644516923]
 [1.0 0.0 0.0 ... 1.0 0.0 -0.42506261514329724]
 ...
 [0.0 1.0 0.0 ... 0.0 0.0 -1.312017804940792]
 [1.0 0.0 0.0 ... 1.0 0.0 1.3488477644516923]
 [0.0 0.0 1.0 ... 0.0 0.0 -1.312017804940792]]


In [38]:
print(x_test)

[[0.0 0.0 0.0 ... 0.0 0.0 0.4546389881476175]
 [0.0 0.0 0.0 ... 0.0 0.0 -0.4471242941287571]
 [0.0 0.0 1.0 ... 0.0 0.0 0.4546389881476175]
 ...
 [0.0 1.0 0.0 ... 0.0 0.0 -0.4471242941287571]
 [1.0 0.0 0.0 ... 0.0 0.0 -0.4471242941287571]
 [0.0 1.0 0.0 ... 0.0 0.0 0.4546389881476175]]


In [39]:
print(y_train)

[[-1.53646174]
 [ 1.19419762]
 [-1.38753275]
 ...
 [-1.66241058]
 [ 0.209587  ]
 [-1.72825752]]


In [40]:
print(y_test)

[[-0.62288106]
 [-0.604924  ]
 [-1.14582589]
 ...
 [-0.98903003]
 [ 1.0134019 ]
 [-0.45951555]]
