In [None]:
# Libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rcParams
from scipy import stats

In [None]:
# importing Dataset

df1=pd.read_csv('../input/placement-data-eda-randomforest-beginner/Placement_Data_Full_Class.csv')
df1.head()

In [None]:
# number of features & records

df1.shape 

# *EXPLORATORY DATA ANALYSIS*

In [None]:
#Checking the features provided in the dataset

df1.columns

# *Feature description*

sl_no: Serial Number
gender: Gender- Male='M',Female='F'
ssc_p: Secondary Education percentage- 10th Grade
ssc_b: Board of Education- Central/ Others
hsc_p: Higher Secondary Education percentage- 12th Grade
hsc_b: Board of Education- Central/ Others
hsc_s: Specialization in Higher Secondary Education
degree_p: Degree Percentage
degree_t: Under Graduation(Degree type)- Field of degree education
workex: Work Experience
etest_p: Employability test percentage ( conducted by college)
specialisation: Post Graduation(MBA)- Specialization
mba_p: MBA percentage
status: Status of placement- Placed/Not placed
salary: Salary offered by corporate to candidates

In [None]:
#Dropping the sl:no column

df=df1.iloc[:,1:]
df.head()

In [None]:
# checking Dtypes of columns

df.info()

In [None]:
#Checking for null values in the dataset

df.isnull().sum()

## *Columnwise Evaluation*

### *1.Gender*

In [None]:
# How many Male and Female candidates

DG=df.gender.value_counts()
DG

In [None]:
# Pie diagram for gender evaluation

rcParams['figure.figsize'] = 5,5

plt.pie(DG,[0.1,0],colors=['blue','pink'],labels=['M','F'],autopct='%1.1f%%')
plt.title("Gender")
plt.show()

### *2.ssc_p: Secondary Education percentage- 10th Grade*

In [None]:
# What is the maximum secondary Education Percentage

df.ssc_p.max()

In [None]:
# What is the minimum secondary Education Percentage


df.ssc_p.min()

In [None]:
df.ssc_p.value_counts()

In [None]:
#frequecies of ssc_p

sns.histplot(data=df[['ssc_p']],x='ssc_p')
plt.title('Secondary Education Percentage (ssc_p)')
plt.show()


In [None]:
# ssc_p of Female  & Male candidates

In [None]:
F_ssc_p=df[(df.gender=='F') & (df.ssc_p)].iloc[:,[0,1]].sort_values(by=['ssc_p'],ascending=False)
F_ssc_p

In [None]:
M_ssc_p=df[(df.gender=='M') & (df.ssc_p)].iloc[:,[0,1]].sort_values(by=['ssc_p'],ascending=False)
M_ssc_p

In [None]:
F_ssc_p.value_counts().head(10)

In [None]:
M_ssc_p.value_counts().head(10)

In [None]:
# ssc_p of Feamle and Male candidates in histogram

fig,(ax1,ax2) = plt.subplots(1,2,sharex=True, figsize=(10,5))

sns.histplot(data=F_ssc_p,x='ssc_p',ax=ax1,color='r')

sns.histplot(data=M_ssc_p,x='ssc_p',ax=ax2)

ax1.title.set_text('(ssc_p) female candidates')
ax2.title.set_text('(ssc_p) male candidates')

plt.show()

### *3.ssc_b: Board of Education- Central/ Others*

In [None]:
# how many candidates from central & others board of education

df.ssc_b.value_counts()

In [None]:
df.ssc_b.value_counts().plot(kind='bar',color='#19A992')
plt.xticks(rotation=0)

plt.title('ssc_b: Board of Education- Central/ Others')

In [None]:
def countplot_of_2(x,hue,title=None,figsize=(6,5)):
    plt.figure(figsize=figsize)
    sns.countplot(data=df[[x,hue]],x=x,hue=hue,palette="Set3")
    plt.title(title)
    plt.show()

In [None]:
# How many female/male candidates from central/ other board of education

In [None]:
df[(df.gender=='F')&(df.ssc_b=='Central')].shape

In [None]:
df[(df.gender=='M')&(df.ssc_b=='Central')].shape

In [None]:
df[(df.gender=='M')&(df.ssc_b=='Others')].shape

In [None]:
df[(df.gender=='F')&(df.ssc_b=='Others')].shape

In [None]:
#graphical representation of Male/Female candidates from central/others board of education

countplot_of_2('ssc_b','gender','Male/Female candidates from central/others board of education')


### *4.hsc_p: Higher Secondary Education percentage- 12th Grade*

In [None]:
df.hsc_p

In [None]:
df.hsc_p.value_counts()

In [None]:
#HSE-Percentage Frequency graph

plt.figure(figsize=(12,6))

plt.title('Higher Secondary Education percentage- 12th Grade')
                                                
plt.xlabel('hsc_p')

plt.hist(df.hsc_p, bins = np.arange(5,100,5), color = 'blue', edgecolor ='orange')

In [None]:
# HSE percentage of male/female candidates

In [None]:
M_hsc_p=df[(df.gender=='M') & (df.hsc_p)].iloc[:,[0,3]].sort_values(by=['hsc_p'],ascending=False)

In [None]:
M_hsc_p.value_counts().head()

In [None]:
F_hsc_p=df[(df.gender=='F') & (df.hsc_p)].iloc[:,[0,3]].sort_values(by=['hsc_p'],ascending=False)


In [None]:
F_hsc_p.value_counts().head()

In [None]:
# hsc_p of Feamle and Male candidates

fig,(ax1,ax2) = plt.subplots(1,2,sharex=True, figsize=(15,5),)

sns.histplot(data=F_hsc_p,x='hsc_p',ax=ax1,color='violet')

sns.histplot(data=M_hsc_p,x='hsc_p',ax=ax2,color='purple')

ax1.title.set_text(' hsc perc.of female candidates')
ax2.title.set_text('hsc perc.of male candidates')

plt.show()

In [None]:
# Scatter plot for ssc_p & hsc_p Of Female/Male.

sns.scatterplot(data=df,x=df.ssc_p,y=df.hsc_p, hue='gender')


### *5.hsc_b: Board of Education- Central/ Others*

In [None]:
#how many candidates from central/other board of education

df.hsc_b.value_counts()

In [None]:
# pie-plot for hsc_b 

plt.pie(df.hsc_b.value_counts(),[0.1,0],colors=['orange','yellow'],labels=['Others','Central'],autopct="%1.1f%%")
plt.title("HSC_b")
plt.show()

In [None]:
#graphical representation of Male/Female candidates from central/others board of education-12th

countplot_of_2('hsc_b','gender','Male/Female candidates from central/others board of education-12th')


### *6.hsc_s: Specialization in Higher Secondary Education*

In [None]:
df.hsc_s.value_counts()

In [None]:
plt.figure(figsize=(7,4))

sns.barplot(x=df.hsc_s.value_counts(), y=df.hsc_s.value_counts().index,palette = "rocket")


plt.xlabel('12th-specialization"',fontsize=18)



In [None]:
countplot_of_2('gender','hsc_s','male/female-12thspecialization')


### *7.degree_p: Degree Percentage*

In [None]:
df.degree_p.value_counts()

In [None]:
# Degree percentage histogram

plt.hist(df.degree_p,bins=8,color='aqua',edgecolor='green')
plt.title('Degree percentage')
                                                
plt.xlabel('degree_p')


In [None]:
M_Degree_p=df[(df.gender=='M') & (df.degree_p)].iloc[:,[0,6]].sort_values(by=['degree_p'],ascending=False)
M_Degree_p

In [None]:
M_Degree_p.value_counts()

In [None]:
F_Degree_p=df[(df.gender=='F') & (df.degree_p)].iloc[:,[0,6]].sort_values(by=['degree_p'],ascending=False)
F_Degree_p

In [None]:
F_Degree_p.value_counts().head()

In [None]:
# degree_p of Feamle and Male candidates

fig,(ax1,ax2) = plt.subplots(1,2,sharex=True, figsize=(15,5),)

sns.histplot(data=F_Degree_p,x='degree_p',ax=ax1,color='green')

sns.histplot(data=M_Degree_p,x='degree_p',ax=ax2,color='blue')

ax1.title.set_text(' Degree perc.of female candidates')
ax2.title.set_text('Degree perc.of male candidates')

plt.show()

### *8.degree_t: Under Graduation(Degree type)- Field of degree education*

In [None]:
df.degree_t.value_counts()

In [None]:
# pie-plot for degree_stream 

plt.pie(df.degree_t.value_counts(),[0.1,.05,0],colors=['red','yellow','green'],labels=['Comme&Mgmt','Sci&Tech','Others'],autopct="%1.1f%%")
plt.title("Degree_Stream")
plt.show()

In [None]:
countplot_of_2('degree_t','gender',"Male/Female candidates from different degree's")


### *9.workex: Work Experience*

In [None]:
df.workex.value_counts()

In [None]:
sns.displot(df.workex,color='violet')

In [None]:
countplot_of_2('workex','gender',"Male/Female candidates_work experience")


### *10.etest_p: Employability test percentage ( conducted by college)*

In [None]:
df.etest_p.value_counts()

In [None]:
# Degree percentage histogram

plt.hist(df.etest_p,bins=8,color='pink',edgecolor='green')
plt.title('Employability test percentage ( conducted by college)')
                                                
plt.xlabel('etest_p')


In [None]:
F_etest_p=df[(df.gender=='F') & (df.etest_p)].iloc[:,[0,9]].sort_values(by=['etest_p'],ascending=False)
F_etest_p

In [None]:
M_etest_p=df[(df.gender=='M') & (df.etest_p)].iloc[:,[0,9]].sort_values(by=['etest_p'],ascending=False)
M_etest_p

In [None]:
# etest_p of Feamle and Male candidates

fig,(ax1,ax2) = plt.subplots(1,2,sharex=True, figsize=(15,5),)

sns.histplot(data=F_etest_p,x='etest_p',ax=ax1,color='yellow')

sns.histplot(data=M_etest_p,x='etest_p',ax=ax2,color='orange')

ax1.title.set_text('Employability test percentage of female candidates')
ax2.title.set_text('Employability test percentage of male candidates')

plt.show()

### *11.specialisation: Post Graduation(MBA)- Specialization*

In [None]:
df.specialisation.value_counts()

In [None]:
sns.displot(df.specialisation,bins=15,color='green')

In [None]:
countplot_of_2('specialisation','gender',"Male/Female candidates-specialisation")

### *12.mba_p: MBA percentage*

In [None]:
df.mba_p.value_counts()

In [None]:
plt.hist(df.mba_p,bins=8,color='indigo',edgecolor='cyan')
plt.title('MBA Percentage')
                                                
plt.xlabel('mba_p')


In [None]:
# Scatterplot of employbility test percentage & MBA percentage of candidates

sns.scatterplot(data=df, x="etest_p", y="mba_p", hue="gender")

### *13.status: Status of placement- Placed/Not placed*


In [None]:
df.status.value_counts()

In [None]:
plt.pie(df.status.value_counts(),[0.2,0],colors=['orange','green'],labels=['placed','not placed'],autopct="%1.1f%%")
plt.title("placement")
plt.show()

In [None]:
countplot_of_2('status','gender',"Male/Female candidates-Placement")

In [None]:
countplot_of_2('specialisation','status',"specialisation-Placement")

In [None]:
df.head()

In [None]:
# what is the salary of most of the candidates.

df.salary.value_counts().head(10)

In [None]:
df.salary.max()

In [None]:
df.salary.min()

## *ML Algorithm-RandomForestClassifier*

### *Splitting data into train and test*

In [None]:
X=df.iloc[:,:-2]
X.head()

In [None]:
y=df.status
y

### *LabelEncoding*

In [None]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()

In [None]:
X.gender=le.fit_transform(X.gender)

X.ssc_b=le.fit_transform(X.ssc_b)

X.hsc_b=le.fit_transform(X.hsc_b)

X.hsc_s=le.fit_transform(X.hsc_s)

X.degree_t=le.fit_transform(X.degree_t)

X.workex=le.fit_transform(X.workex)

X.specialisation=le.fit_transform(X.specialisation)

y=df.status=le.fit_transform(df.status)
X



In [None]:
y

In [None]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=.30,random_state=36)

In [None]:
from sklearn.metrics import accuracy_score,classification_report


In [None]:
from sklearn.ensemble import RandomForestClassifier
model=RandomForestClassifier()
model.fit(X_train,y_train)
y_predict=model.predict(X_test)


In [None]:
accuracy_score(y_test,y_predict)


In [None]:
print(classification_report(y_test,y_predict))