In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
data = pd.read_csv('/kaggle/input/factors-affecting-campus-placement/Placement_Data_Full_Class.csv')
data.head()

In [None]:
data.info()

* Salary has missing data values for students who have not been placed
* All other columns have no missing values

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [None]:
sns.set(style='whitegrid', palette='muted', font_scale=1.1)

In [None]:
data_p = data[data['status'] == 'Placed']
data_np = data.loc[data['status'] == 'Not Placed']
data2 = data.copy()
data2['status'] = data2['status'].map({'Placed':1, 'Not Placed': 0}).astype(int)

# **Heat Map**

In [None]:
plt.figure(figsize=(14,7))
plt.title('Heatmap')
sns.heatmap(data=data2.drop(['salary', 'sl_no'], axis=1).corr(), annot=True)

* High correlation between status and (ssc_p, hsc_p, degree_p)
* Low correlation between status and (etest_p, mba_p)

# **Relation between different marks**

In [None]:
sns.pairplot(data, vars=['ssc_p', 'hsc_p', 'degree_p', 'etest_p', 'mba_p'], hue='status', kind='reg')

* For pairs of (ssc_p v/s hsc_p), (ssc_p v/s degree_p), (hsc_p v/s degree_p) it can be estabilished that good performance in all three fronts relate to higher chances of getting placed
* The other two columns do not point to any such correlation. The marks obtained in them seem to have no influence on the placement.

# **Gender**

In [None]:
data2[['gender','status']].groupby(['gender'], as_index=False).mean()

* Males have a slighly higher probabilty of getting placed when compared to females
* 71.9% males get placed compared to 63% females

# **Senior Secondary**

In [None]:
plt.figure(figsize=(12,6))
sns.swarmplot(x=data['status'], y=data['ssc_p'], hue=data['ssc_b'])

In [None]:
data2[['ssc_b','status']].groupby(['ssc_b'], as_index=False).mean()

In [None]:
data[['salary', 'ssc_b']].groupby('ssc_b', as_index=False).median()

* No major diffrence in centrer board or other board w.r.t placement probability
* On an average, more percentage score in ssc increases chances of placement
* salary for students of both ssc_b boards is almost same
* Students with ssc_p < 50 are not placed
* Students with ssc_p > 80 are always placed

# **High School**

In [None]:
plt.figure(figsize=(12,6))
sns.swarmplot(x=data['status'], y=data['hsc_p'], hue=data['hsc_b'])

In [None]:
data2[['hsc_b','status']].groupby(['hsc_b'], as_index=False).mean()

In [None]:
data[['salary', 'hsc_b']].groupby('hsc_b', as_index=False).median()

* No major diffrence in centrer board or other board w.r.t placement probability
* On an average, more percentage score in hsc increases chances of placement
* Median salary for students of both hsc_b boards is almost same

In [None]:
plt.figure(figsize=(12,6))
sns.swarmplot(x=data['status'], y=data['hsc_p'], hue=data['hsc_s'])

In [None]:
data2[['hsc_s','status']].groupby(['hsc_s'], as_index=False).mean()

In [None]:
data[['salary', 'hsc_s']].groupby('hsc_s', as_index=False).median()

* No major diffrence in Science and Commerce w.r.t placement probability. For Arts probability is low.
* Median salary for students in Science and Commerce is almost same. For Arts, the salary is lower.
* We should note that, data points for Arts are less. So, inference w.r.t Art might not be accurate.
* Students with hsc_p < 50 are not placed
* Students with hsc_p > 80 are always placed

# **Bachelor's Degree**

In [None]:
plt.figure(figsize=(12,6))
sns.swarmplot(x=data['status'], y=data['degree_p'], hue=data['degree_t'])

In [None]:
data2[['degree_t','status']].groupby(['degree_t'], as_index=False).mean()

In [None]:
data[['salary', 'degree_t']].groupby('degree_t', as_index=False).median()

* No major diffrence in Comm&Mgmt and Sci&Tech w.r.t placement probability. For Others probability is lower.
* On an average, more percentage score in bachelors degree increases chances of placement
* In terms of salary there is only a slight diffrence with Sci&Tech being the best paymaster and Others lowest.
* Datapoints for Others is less, so inference from its data might not be accurate
* Students with degree_p < 55 are not placed
* Students with degree_p > 80 are always placed

# **Work Experiece**

In [None]:
data2[['workex','status']].groupby(['workex'], as_index=False).mean()

In [None]:
plt.figure(figsize=(12,6))
sns.barplot(x='workex', y='status', data=data2)

In [None]:
data[['salary', 'workex']].groupby('workex', as_index=False).median()

* People with prior work experience have a very high chance of placement.
* There is quite a big difference in percentage of people with work experience being placed as compared to freshers.
* Median salary for freshers and experinced professionals is almost similar.

# **Employability Test Score**

In [None]:
plt.figure(figsize=(12,6))
sns.swarmplot(x=data['status'], y=data['etest_p'])

In [None]:
plt.figure(figsize=(12,6))
sns.regplot(x=data['salary'], y=data['etest_p'])

* employability test score does not impact placement. 
* Salary seems to increase with increase in employability test score.

# **MBA**

In [None]:
plt.figure(figsize=(12,6))
sns.swarmplot(x=data['status'], y=data['mba_p'], hue=data['specialisation'])

In [None]:
data2[['specialisation','status']].groupby(['specialisation'], as_index=False).mean()

In [None]:
data2[['specialisation','salary']].groupby(['specialisation'], as_index=False).median()

* No conclusive relation between mba_p and placement
* Mkt&Fin professionals have a higher chance in getting a placement than Mkt&Hr. Infact, 79% of Mkt&Fin get placed compared to just 55% of Mkt&HR
* Mkt&Fin have a slighly higher median salary than Mkt&Hr

In [None]:
sns.catplot('degree_t', 'status', hue='specialisation', col='workex', 
            data=data2.loc[(data2['degree_t'] == 'Sci&Tech') | (data2['degree_t'] == 'Comm&Mgmt')], 
            kind='point')

* Mkt&Fin has better placements
* Students with workex have better placements
* Mkt&Fin coupled with Comm&Mgmt degree and prior work experience has almost 100% placement probability
* For others, data points are we can not really analyze anything from it.