In [1]:
pip install pandas
pip install numpy
pip install -U scikit-learn



In [59]:
# Run this program on your local python
# interpreter, provided you have installed
# the required libraries.

# Importing the required packages
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
#for encoding
from sklearn.preprocessing import LabelEncoder

In [73]:
# Function importing Dataset
def importdata():
  #balance_data = pd.ExcelFile('/content/Cruise Director Analysis.xlsx',sep= ',', header = None)

  # read an excel file and convert 
  # into a dataframe object
  balance_data_actual = pd.DataFrame(pd.read_excel("/content/Cruise Director Analysis.xlsx"))
  
  # drom na values
  balance_data_actual = balance_data_actual.dropna()

  # removed unwanted columns
  balance_data = balance_data_actual.drop(['ID', 'Port of Embarkation', 'No of Siblings or Spouses on Board', 'No of Parents or Children on Board', 'Passenger Fare', 'ChildFare?'], axis = 1)

  #label encoding
  le = LabelEncoder()
  balance_data['PurchasedPackage'] = le.fit_transform(balance_data['PurchasedPackage'])
  balance_data['Economic Class'] = le.fit_transform(balance_data['Economic Class'])
  balance_data['Sex'] = le.fit_transform(balance_data['Sex'])
  balance_data['Age'] = le.fit_transform(balance_data['Age'])
  
  # Printing the dataswet shape
  print ("Dataset Length: ", len(balance_data))
  print ("Dataset Shape: ", balance_data.shape)
  
  # Printing the dataset obseravtions
  print ("Dataset: ",balance_data.head())
  return balance_data

In [81]:
# Function to split the dataset
def splitdataset(balance_data):
  # Separating the target variable
  X = balance_data.values[:, 0:3]
  Y = balance_data.values[:, 3]

	# Splitting the dataset into train and test
  X_train, X_test, y_train, y_test = train_test_split(
	X, Y, test_size = 0.3, random_state = 100)
  
  return X, Y, X_train, X_test, y_train, y_test

In [6]:
# Function to perform training with giniIndex.
def train_using_gini(X_train, X_test, y_train):

	# Creating the classifier object
	clf_gini = DecisionTreeClassifier(criterion = "gini",
			random_state = 100,max_depth=3, min_samples_leaf=5)

	# Performing training
	clf_gini.fit(X_train, y_train)
	return clf_gini

In [7]:
# Function to perform training with entropy.
def tarin_using_entropy(X_train, X_test, y_train):

	# Decision tree with entropy
	clf_entropy = DecisionTreeClassifier(
			criterion = "entropy", random_state = 100,
			max_depth = 3, min_samples_leaf = 5)

	# Performing training
	clf_entropy.fit(X_train, y_train)
	return clf_entropy

In [8]:
# Function to make predictions
def prediction(X_test, clf_object):

	# Predicton on test with giniIndex
	y_pred = clf_object.predict(X_test)
	print("Predicted values:")
	print(y_pred)
	return y_pred

In [9]:
# Function to calculate accuracy
def cal_accuracy(y_test, y_pred):
	
	print("Confusion Matrix: ",
		confusion_matrix(y_test, y_pred))
	
	print ("Accuracy : ",
	accuracy_score(y_test,y_pred)*100)
	
	print("Report : ",
	classification_report(y_test, y_pred))

In [38]:
# Driver code
def main():
  # Building Phase

  data = importdata()
  
  X, Y, X_train, X_test, y_train, y_test = splitdataset(data)
  clf_gini = train_using_gini(X_train, X_test, y_train)
  clf_entropy = tarin_using_entropy(X_train, X_test, y_train)
  
  # Operational Phase
  print("Results Using Gini Index:")
  
  # Prediction using gini
  y_pred_gini = prediction(X_test, clf_gini)
  cal_accuracy(y_test, y_pred_gini)
  
  print("Results Using Entropy:")
  # Prediction using entropy
  y_pred_entropy = prediction(X_test, clf_entropy)
  cal_accuracy(y_test, y_pred_entropy)

In [82]:
# Calling main function
if __name__=="__main__":
	main()

Dataset Length:  1046
Dataset Shape:  (1046, 4)
Dataset:     Economic Class  Sex  Age  PurchasedPackage
0               2    1   49                 0
1               0    1   38                 1
2               0    1   41                 0
3               2    0   50                 1
4               0    1   56                 0
Results Using Gini Index:
Predicted values:
[1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 1 1 1 0 1 0
 0 0 0 0 0 0 0 1 0 1 1 1 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0
 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 1
 0 1 0 0 0 0 1 1 0 1 0 0 1 1 0 0 0 0 0 1 0 0 1 0 0 1 0 0 1 1 1 0 0 0 0 1 1
 1 0 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0
 0 1 1 0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 0
 0 1 0 0 0 0 0 0 0 0 0