In [1]:
#Social_Network_Ads.csv

**Importing the libraries**

In [14]:
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import pandas as pd
import numpy as np
from sklearn import tree
from sklearn import datasets, neighbors
from mlxtend.plotting import plot_decision_regions 
from sklearn.model_selection import cross_val_score 

**Importing the dataset**

In [None]:
ds = pd.read_csv('Social_Network_Ads.csv')
ds.head()
ds = ds.drop('User ID', axis=1)
ds

In [16]:
ds['Purchased'].value_counts()

0    257
1    143
Name: Purchased, dtype: int64

In [17]:
ds.isnull().sum()

Gender             0
Age                0
EstimatedSalary    0
Purchased          0
dtype: int64

**Splitting the dataset into the Training set and Test set**

In [18]:
feature = ds.drop('Purchased', axis = 1)

target = ds['Purchased']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(feature, target, shuffle = True, test_size=0.2, random_state=1)

print('Shape of training feature:', X_train.shape)
print('Shape of testing feature:', X_test.shape)
print('Shape of training label:', y_train.shape)
print('Shape of testing label:', y_test.shape)

Shape of training feature: (320, 3)
Shape of testing feature: (80, 3)
Shape of training label: (320,)
Shape of testing label: (80,)


**Feature Scaling**

In [19]:
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(sparse=False)
cat_cols = ['Gender']

# Encode Categorical Data
df_encoded = pd.DataFrame(encoder.fit_transform(ds[cat_cols]))
df_encoded.columns = encoder.get_feature_names(cat_cols)

# Replace Categorical Data with Encoded Data
ds = ds.drop(cat_cols ,axis=1)
ds = pd.concat([df_encoded, ds], axis=1)



**Fitting Random Forest to the Training set**

In [20]:
from sklearn.ensemble import RandomForestClassifier
classifier= RandomForestClassifier(n_estimators= 10, criterion="entropy")
classifier.fit(X_train, y_train)

ValueError: ignored

**Predicting the Test set results**

In [None]:
y_pred= classifier.predict(x_test)  

**Making the Confusion Matrix**

In [9]:
def evaluate_model(model, x_test, y_test):
    from sklearn import metrics

    # Predict Test Data 
    y_pred = model.predict(x_test)

    # Calculate accuracy, precision, recall, f1-score, and kappa score
    acc = metrics.accuracy_score(y_test, y_pred)
    prec = metrics.precision_score(y_test, y_pred)
    rec = metrics.recall_score(y_test, y_pred)
    f1 = metrics.f1_score(y_test, y_pred)

    # Display confussion matrix
    cm = metrics.confusion_matrix(y_test, y_pred)

    return {'acc': acc, 'prec': prec, 'rec': rec, 'f1': f1, 
             'cm': cm}

In [None]:
dtc = RandomForestClassifier(n_estimators= 10, criterion="entropy")
dtc.fit(X_train, y_train)
dtc_eval = evaluate_model(dtc, X_test, y_test)
print('Confusion Matrix:\n', dtc_eval['cm'])

**Visualising the Training set results**

In [None]:
X_set, y_set = X_train, y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
                     np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, dtc.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Random Forest (Training set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()

**Visualising the Test set results**

In [None]:
X_set, y_set = X_test, y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
                     np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, dtc.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Random Forest (Testing set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()