In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn import svm
import xgboost as xgb
from sklearn.model_selection import train_test_split

In [None]:
#loading the dataset
engineering_dataset = pd.read_csv('/kaggle/input/engineering-placements-prediction/collegePlace.csv')
engineering_dataset.head(3)

In [None]:
#checking out the data
engineering_dataset.shape

In [None]:
#any missing values?
engineering_dataset.isnull().sum()

In [None]:
engineering_dataset.info()

In [None]:
#statistical summaries
engineering_dataset.describe()

In [None]:
#checking distribution
#1--> placed
#0--> notplaced
engineering_dataset['PlacedOrNot'].value_counts()

In [None]:
#changing the gender to numerical values
engineering_dataset.replace({'Gender':{'Female':0,'Male':1}},inplace = True)
engineering_dataset.head()

In [None]:
#getting our X and Y datasets
X = engineering_dataset.drop(columns = ['Stream','PlacedOrNot'],axis = 1)
Y = engineering_dataset['PlacedOrNot']
print(X.shape,Y.shape)

In [None]:
#splitting into testing and training
X_train,X_test,Y_train,Y_test = train_test_split(X,Y, test_size = 0.1 , stratify = Y, random_state = 2)

print(X_train.shape,X_test.shape,Y_train.shape,Y_test.shape)

Logistic Regression Model

In [None]:
#training our model
model = LogisticRegression()
model.fit(X_train,Y_train)

In [None]:
#forming predictions on our X_train
training_pred = model.predict(X_train)
score_1 = accuracy_score(Y_train,training_pred)
print('Accuracy score on training data using Linear Regression:',(score_1)*100)

In [None]:
#testing data
testing_pred = model.predict(X_test)
score_2 = accuracy_score(Y_test,testing_pred)
print('Accuracy score on training data using Linear Regression:',(score_2)*100)

Support Vector Machine Model

In [None]:
model = svm.SVC(kernel = 'linear')
model.fit(X_train,Y_train)

In [None]:
#forming predictions on our X_train
training_pred = model.predict(X_train)
score_1 = accuracy_score(Y_train,training_pred)
print('Accuracy score on training data using SVM:',(score_1)*100)

In [None]:
#testing data
testing_pred = model.predict(X_test)
score_2 = accuracy_score(Y_test,testing_pred)
print('Accuracy score on training data using SVM:',(score_2)*100)

**XGBOOST**

In [None]:
model_xgb = xgb.XGBClassifier(
 learning_rate =0.3,
 n_estimators=190,
 max_depth=6,
 objective= 'binary:logistic',
 scale_pos_weight=0.47,
 seed=27,
 use_label_encoder=False)
model_xgb.fit(X_train, Y_train)

In [None]:
#forming predictions on our X_train
training_pred = model_xgb.predict(X_train)
score_1 = accuracy_score(Y_train,training_pred)
print('Accuracy score on training data using XGB:',(score_1)*100)

In [None]:
#testing data
testing_pred = model_xgb.predict(X_test)
score_2 = accuracy_score(Y_test,testing_pred)
print('Accuracy score on training data using XGB:',(score_2)*100)

**Making a prediction system**

In [None]:
input_data = [22,1,1,8,1,1]
 # as array
input_data_as_numpy = np.asarray(input_data)
#reshaping
reshaped_input_data = input_data_as_numpy.reshape(1,-1)
#predicting
pred = model_xgb.predict(reshaped_input_data)

if (pred[0]==1):
    print('Congratulations you have been Placed')
else:
    print('Unfortunately you need a retake')