## Random Forest Classification Algorithm


## Step 1: Importing Necessary Libraries


In [1]:
import pandas as pd

In [2]:
data = pd.read_csv("Social_Network_Ads.csv")

In [3]:
data

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0
...,...,...,...,...,...
395,15691863,Female,46,41000,1
396,15706071,Male,51,23000,1
397,15654296,Female,50,20000,1
398,15755018,Male,36,33000,0


## Step 2: Appling Categorical Column and Unwanted Column

In [4]:
data = pd.get_dummies(data, dtype=int,drop_first=True)

In [5]:
data

Unnamed: 0,User ID,Age,EstimatedSalary,Purchased,Gender_Male
0,15624510,19,19000,0,1
1,15810944,35,20000,0,1
2,15668575,26,43000,0,0
3,15603246,27,57000,0,0
4,15804002,19,76000,0,1
...,...,...,...,...,...
395,15691863,46,41000,1,0
396,15706071,51,23000,1,1
397,15654296,50,20000,1,0
398,15755018,36,33000,0,1


In [6]:

# Axis =  1 --> Column wise delete
# Axis = 0 ---> Row wise delete
data = data.drop("User ID",axis = 1) 

In [7]:
data

Unnamed: 0,Age,EstimatedSalary,Purchased,Gender_Male
0,19,19000,0,1
1,35,20000,0,1
2,26,43000,0,0
3,27,57000,0,0
4,19,76000,0,1
...,...,...,...,...
395,46,41000,1,0
396,51,23000,1,1
397,50,20000,1,0
398,36,33000,0,1


In [8]:
data["Purchased"].value_counts()

Purchased
0    257
1    143
Name: count, dtype: int64

In [9]:
data.shape  #rows, columns

(400, 4)

### Functions --> (), Value_counts(), head(), tail()


### Attributes --> shape, columns

In [10]:
data.columns

Index(['Age', 'EstimatedSalary', 'Purchased', 'Gender_Male'], dtype='object')

## Step 3: To Split Input and Output Variables

In [11]:
Independent = data[['Gender_Male','Age','EstimatedSalary']]

In [12]:
Dependent = data[['Purchased']]

In [13]:
Independent


Unnamed: 0,Gender_Male,Age,EstimatedSalary
0,1,19,19000
1,1,35,20000
2,0,26,43000
3,0,27,57000
4,1,19,76000
...,...,...,...
395,0,46,41000
396,1,51,23000
397,0,50,20000
398,1,36,33000


In [14]:
Dependent

Unnamed: 0,Purchased
0,0
1,0
2,0
3,0
4,0
...,...
395,1
396,1
397,1
398,0


## Step 4: To Split Train and Test Values

In [16]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(Independent, Dependent, test_size = 0.30, random_state = 42)

In [17]:
X_train #.shape

Unnamed: 0,Gender_Male,Age,EstimatedSalary
157,1,29,75000
109,0,38,80000
17,1,45,26000
347,0,54,108000
24,1,46,23000
...,...,...,...
71,0,24,27000
106,0,26,35000
270,0,43,133000
348,1,39,77000


In [18]:
X_test

Unnamed: 0,Gender_Male,Age,EstimatedSalary
209,0,46,22000
280,0,59,88000
33,0,28,44000
210,0,48,96000
93,0,29,28000
...,...,...,...
60,1,27,20000
79,0,26,17000
285,0,37,93000
305,1,42,54000


In [19]:
y_train

Unnamed: 0,Purchased
157,0
109,0
17,1
347,1
24,1
...,...
71,0
106,0
270,0
348,0


In [21]:
y_test

Unnamed: 0,Purchased
209,0
280,1
33,0
210,1
93,0
...,...
60,0
79,0
285,1
305,0


## Step 5: To Choose the Algorithm and Learn

In [22]:
from sklearn.ensemble import RandomForestClassifier

In [23]:
Classifier = RandomForestClassifier(n_estimators=150,criterion='entropy',random_state=0)

In [24]:
Classifier

In [25]:
Classifier.fit(X_train, y_train)

  Classifier.fit(X_train, y_train)


In [26]:
y_pred = Classifier.predict(X_test)

## Step 6: To Find the Confusion matrix of the Model

In [27]:
from sklearn.metrics import confusion_matrix

In [28]:
confusion_matrix(y_test,y_pred)

array([[67,  6],
       [ 7, 40]], dtype=int64)

## Step 7: To Find the Classification_Report 

In [29]:
from sklearn.metrics import classification_report

In [30]:
Clf_report = classification_report(y_test, y_pred)

In [31]:
Clf_report

'              precision    recall  f1-score   support\n\n           0       0.91      0.92      0.91        73\n           1       0.87      0.85      0.86        47\n\n    accuracy                           0.89       120\n   macro avg       0.89      0.88      0.89       120\nweighted avg       0.89      0.89      0.89       120\n'

## Step 8: Test the model using values

In [32]:
Gender = int(input("Enter you gender (Male: 1, Female: 0): "))
Age = int(input("Enter you Age: "))
Estimated_Salary = int(input("Enter your Estimated Salary: "))

Classifier.predict([[Gender, Age, Estimated_Salary]])

Enter you gender (Male: 1, Female: 0):  0
Enter you Age:  45
Enter your Estimated Salary:  75000




array([0], dtype=int64)

## Step 9:Saving final model

In [33]:
import pickle

In [34]:
#save the model name and create a file need to end in.sav
fname="final_profit_pred.pkl"

In [36]:
# Save the model into the model name,dump=open,wb=write binary
pickle.dump(Classifier,open(fname,'wb'))

In [37]:
loaded_model=pickle.load(open("final_profit_pred.pkl",'rb'))
loaded_model

In [38]:
result=loaded_model.predict([[Gender,Age,Estimated_Salary]])



In [39]:
result

array([0], dtype=int64)