In [4]:
import pandas as pd
from warnings import filterwarnings
data = pd.read_csv("Final_Train_Dataset.csv")
df= data[['company_name_encoded','experience', 'location', 'salary']]
filterwarnings('ignore')

In [5]:
df.shape

(19802, 4)

In [6]:
df.head()

Unnamed: 0,company_name_encoded,experience,location,salary
0,3687,5-7 yrs,Delhi NCR(Vikas Puri),6to10
1,458,10-17 yrs,Sonepat,10to15
2,4195,5-9 yrs,Delhi NCR,15to25
3,313,7-10 yrs,Bengaluru,10to15
4,1305,1-3 yrs,Gurgaon,3to6


In [7]:
df.isna().sum().sum()

0

In [8]:
#Cleaning the experience
exp = list(data.experience)
min_ex = []
max_ex = []

for i in range(len(exp)):
   exp[i] = exp[i].replace("yrs","").strip()
   min_ex.append(int(exp[i].split("-")[0].strip()))
   max_ex.append(int(exp[i].split("-")[1].strip()))

In [9]:
#Attaching the new experiences to the original dataset
df["minimum_exp"] = min_ex
df["maximum_exp"] = max_ex

In [14]:
df.head()

Unnamed: 0,company_name_encoded,experience,location,salary,minimum_exp,maximum_exp
0,3687,5-7 yrs,Delhi NCR(Vikas Puri),6to10,5,7
1,458,10-17 yrs,Sonepat,10to15,10,17
2,4195,5-9 yrs,Delhi NCR,15to25,5,9
3,313,7-10 yrs,Bengaluru,10to15,7,10
4,1305,1-3 yrs,Gurgaon,3to6,1,3


In [10]:
#Label encoding location and salary
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['location'] = le.fit_transform(df['location'])
df['salary'] = le.fit_transform(df['salary'])

In [11]:
df['salary'].unique()

array([5, 1, 2, 4, 3, 0], dtype=int64)

In [12]:
#Deleting the original experience column and reordering
df.drop(['experience'], inplace = True, axis = 1)
df = df[['company_name_encoded', 'location','minimum_exp', 'maximum_exp', 'salary']]

In [13]:
df.head().style.highlight_max(axis=0)

Unnamed: 0,company_name_encoded,location,minimum_exp,maximum_exp,salary
0,3687,597,5,7,5
1,458,1412,10,17,1
2,4195,525,5,9,2
3,313,114,7,10,1
4,1305,811,1,3,4


In [14]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
df[['company_name_encoded', 'location', 'minimum_exp', 'maximum_exp']] = sc.fit_transform(df[['company_name_encoded', 'location', 'minimum_exp', 'maximum_exp']])

In [15]:
df.head()

Unnamed: 0,company_name_encoded,location,minimum_exp,maximum_exp,salary
0,0.886106,-0.211248,0.197617,-0.25593,5
1,-1.302594,1.687695,1.6895,2.100694,1
2,1.230441,-0.379007,0.197617,0.215395,2
3,-1.400879,-1.336633,0.79437,0.451058,1
4,-0.728476,0.28737,-0.99589,-1.198579,4


In [16]:
#Splitting the dataset into  training and validation sets
from sklearn.model_selection import train_test_split
training_set, validation_set = train_test_split(df, test_size = 0.2, random_state = 21)

#classifying the predictors and target variables as X and Y
X_train = training_set.iloc[:,0:-1].values
Y_train = training_set.iloc[:,-1].values
X_val = validation_set.iloc[:,0:-1].values
y_val = validation_set.iloc[:,-1].values

In [17]:
def accuracy(confusion_matrix):
   diagonal_sum = confusion_matrix.trace()
   sum_of_all_elements = confusion_matrix.sum()
   return diagonal_sum / sum_of_all_elements

In [18]:
#Importing MLPClassifier
from sklearn.neural_network import MLPClassifier

#Initializing the MLPClassifier
classifier = MLPClassifier(hidden_layer_sizes=(150,100,50), max_iter=300,activation = 'relu',solver='adam',random_state=1)

In [19]:
#Fitting the training data to the network
classifier.fit(X_train, Y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(150, 100, 50), learning_rate='constant',
              learning_rate_init=0.001, max_iter=300, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=1, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [20]:
#Predicting y for X_val
y_pred = classifier.predict(X_val)

In [21]:
#Importing Confusion Matrix
from sklearn.metrics import confusion_matrix
#Comparing the predictions against the actual observations in y_val
cm = confusion_matrix(y_pred, y_val)

#Printing the accuracy
print("Accuracy of MLPClassifier : ", accuracy(cm))

Accuracy of MLPClassifier :  0.4036859378944711


In [1]:
a=2
a

2