In [None]:
#Data.csv

**Step 1: Importing the libraries**

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


**Step 2: Importing dataset**

In [2]:
df = pd.read_csv('/content/Data.csv')
df.head()

Unnamed: 0,Country,Age,Salary,Purchased
0,France,44.0,72000.0,No
1,Spain,27.0,48000.0,Yes
2,Germany,30.0,54000.0,No
3,Spain,38.0,61000.0,No
4,Germany,40.0,,Yes


**Step 3: Handling the missing data**

In [3]:
df.isnull().sum()

Country      0
Age          1
Salary       1
Purchased    0
dtype: int64

In [4]:
df["Age"].fillna(df["Age"].mean(),inplace=True)
df["Salary"].fillna(df["Salary"].mean(),inplace=True)

In [5]:
df.isnull().sum()

Country      0
Age          0
Salary       0
Purchased    0
dtype: int64

**Step 4: Encoding categorical data**

In [6]:
df.Purchased = df.Purchased.map({'No': 0,'Yes': 1})
df.head()

Unnamed: 0,Country,Age,Salary,Purchased
0,France,44.0,72000.0,0
1,Spain,27.0,48000.0,1
2,Germany,30.0,54000.0,0
3,Spain,38.0,61000.0,0
4,Germany,40.0,63777.777778,1


**Step 5: Creating a dummy variable**

In [7]:
df = pd.get_dummies(df, columns = ["Country"])
df.head()

Unnamed: 0,Age,Salary,Purchased,Country_France,Country_Germany,Country_Spain
0,44.0,72000.0,0,1,0,0
1,27.0,48000.0,1,0,0,1
2,30.0,54000.0,0,0,1,0
3,38.0,61000.0,0,0,0,1
4,40.0,63777.777778,1,0,1,0


**Step 6: Splitting the datasets into training sets and Test sets**

In [8]:
X = df[['Age', 'Salary', 'Country_France', 'Country_Germany', 'Country_Spain']].values 
y = df['Purchased'].values

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

**Step 7: Feature Scaling**

In [10]:
scaler = StandardScaler() 
scaler.fit(X_train,y_train) 
X_train_scaled = scaler.transform(X_train) 
X_test_scaled = scaler.transform(X_test)

In [11]:
X_train_scaled

array([[ 0.26306757,  0.12381479, -1.        ,  2.64575131, -0.77459667],
       [-0.25350148,  0.46175632,  1.        , -0.37796447, -0.77459667],
       [-1.97539832, -1.53093341, -1.        , -0.37796447,  1.29099445],
       [ 0.05261351, -1.11141978, -1.        , -0.37796447,  1.29099445],
       [ 1.64058505,  1.7202972 ,  1.        , -0.37796447, -0.77459667],
       [-0.0813118 , -0.16751412, -1.        , -0.37796447,  1.29099445],
       [ 0.95182631,  0.98614835,  1.        , -0.37796447, -0.77459667],
       [-0.59788085, -0.48214934,  1.        , -0.37796447, -0.77459667]])

In [12]:
X_test_scaled

array([[-1.45882927, -0.90166297, -1.        ,  2.64575131, -0.77459667],
       [ 1.98496442,  2.13981082, -1.        ,  2.64575131, -0.77459667]])