In [1]:
import pandas as pd
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import numpy
from catboost import CatBoostRegressor
from sklearn.metrics import r2_score

In [2]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
train.tail(40)

Unnamed: 0,MMM-YY,Emp_ID,Age,Gender,City,Education_Level,Salary,Dateofjoining,LastWorkingDate,Joining Designation,Designation,Total Business Value,Quarterly Rating
19064,2016-10-01,2784,33,Male,C24,College,82815,2012-10-15,,2,3,990000,3
19065,2016-11-01,2784,33,Male,C24,College,82815,2012-10-15,,2,3,591710,3
19066,2016-12-01,2784,33,Male,C24,College,82815,2012-10-15,,2,3,194010,3
19067,2017-01-01,2784,34,Male,C24,College,82815,2012-10-15,,2,3,1309620,3
19068,2017-02-01,2784,34,Male,C24,College,82815,2012-10-15,,2,3,850050,3
19069,2017-03-01,2784,34,Male,C24,College,82815,2012-10-15,,2,3,4128460,3
19070,2017-04-01,2784,34,Male,C24,College,82815,2012-10-15,,2,3,150260,3
19071,2017-05-01,2784,34,Male,C24,College,82815,2012-10-15,,2,3,153800,3
19072,2017-06-01,2784,34,Male,C24,College,82815,2012-10-15,,2,3,979270,3
19073,2017-07-01,2784,34,Male,C24,College,82815,2012-10-15,,2,3,252000,3


In [3]:
train['LastWorkingDate'] = train['LastWorkingDate'].fillna(0)
train.head()

Unnamed: 0,MMM-YY,Emp_ID,Age,Gender,City,Education_Level,Salary,Dateofjoining,LastWorkingDate,Joining Designation,Designation,Total Business Value,Quarterly Rating
0,2016-01-01,1,28,Male,C23,Master,57387,2015-12-24,0,1,1,2381060,2
1,2016-02-01,1,28,Male,C23,Master,57387,2015-12-24,0,1,1,-665480,2
2,2016-03-01,1,28,Male,C23,Master,57387,2015-12-24,2016-03-11,1,1,0,2
3,2017-11-01,2,31,Male,C7,Master,67016,2017-11-06,0,2,2,0,1
4,2017-12-01,2,31,Male,C7,Master,67016,2017-11-06,0,2,2,0,1


In [4]:
df = pd.DataFrame({'Emp_ID':sorted(train.Emp_ID.unique())})
df.head()

Unnamed: 0,Emp_ID
0,1
1,2
2,4
3,5
4,6


In [5]:
age_col = train.groupby(['Emp_ID']).Age.agg(['min'])
age_col = age_col.reset_index()
age_col.columns = ['Emp_ID','Age']

def get_age(x):
  return age_col[age_col.Emp_ID==x].Age.iloc[0]

df["Age"] = age_col.Emp_ID.apply(get_age)
df

Unnamed: 0,Emp_ID,Age
0,1,28
1,2,31
2,4,43
3,5,29
4,6,31
...,...,...
2376,2784,33
2377,2785,34
2378,2786,44
2379,2787,28


In [6]:
gen_col = train.groupby(['Emp_ID','Gender']).Gender.agg('count').to_frame()
gen_col.columns = ['gen_count']
gen_col = gen_col.reset_index()

def get_gen(x):
  return gen_col[gen_col.Emp_ID==x].Gender.iloc[0]

df["Gender"] = gen_col.Emp_ID.apply(get_gen)
df

Unnamed: 0,Emp_ID,Age,Gender
0,1,28,Male
1,2,31,Male
2,4,43,Male
3,5,29,Male
4,6,31,Female
...,...,...,...
2376,2784,33,Male
2377,2785,34,Female
2378,2786,44,Male
2379,2787,28,Female


In [7]:
city_col = train.groupby(['Emp_ID','City']).City.agg('count').to_frame()
city_col.columns = ['city_count']
city_col = city_col.reset_index()

def get_city(x):
  return city_col[city_col.Emp_ID==x].City.iloc[0]

df["City"] = city_col.Emp_ID.apply(get_city)
df

Unnamed: 0,Emp_ID,Age,Gender,City
0,1,28,Male,C23
1,2,31,Male,C7
2,4,43,Male,C13
3,5,29,Male,C9
4,6,31,Female,C11
...,...,...,...,...
2376,2784,33,Male,C24
2377,2785,34,Female,C9
2378,2786,44,Male,C19
2379,2787,28,Female,C20


In [8]:
edu_col = train.groupby(['Emp_ID','Education_Level']).Education_Level.agg('count').to_frame()
edu_col.columns = ['edu_count']
edu_col = edu_col.reset_index()

def get_edu(x):
  return edu_col[edu_col.Emp_ID==x].Education_Level.iloc[0]

df["Education_Level"] = edu_col.Emp_ID.apply(get_edu)
df

Unnamed: 0,Emp_ID,Age,Gender,City,Education_Level
0,1,28,Male,C23,Master
1,2,31,Male,C7,Master
2,4,43,Male,C13,Master
3,5,29,Male,C9,College
4,6,31,Female,C11,Bachelor
...,...,...,...,...,...
2376,2784,33,Male,C24,College
2377,2785,34,Female,C9,College
2378,2786,44,Male,C19,College
2379,2787,28,Female,C20,Master


In [9]:
sal_col = train.groupby(['Emp_ID']).Salary.agg(['max'])
sal_col = sal_col.reset_index()
sal_col.columns = ['Emp_ID','Salary']

def get_sal(x):
  return sal_col[sal_col.Emp_ID==x].Salary.iloc[0]

df["Salary"] = sal_col.Emp_ID.apply(get_sal)
df

Unnamed: 0,Emp_ID,Age,Gender,City,Education_Level,Salary
0,1,28,Male,C23,Master,57387
1,2,31,Male,C7,Master,67016
2,4,43,Male,C13,Master,65603
3,5,29,Male,C9,College,46368
4,6,31,Female,C11,Bachelor,78728
...,...,...,...,...,...,...
2376,2784,33,Male,C24,College,82815
2377,2785,34,Female,C9,College,12105
2378,2786,44,Male,C19,College,35370
2379,2787,28,Female,C20,Master,69498


In [10]:
all_occur = train.groupby(['Emp_ID']).size().to_frame()
all_occur = all_occur.reset_index()
all_occur.columns = ['Emp_ID','months_worked']
def months_worked_baby(x):
  return all_occur[all_occur.Emp_ID==x].months_worked.iloc[0]

df["months_worked"] = all_occur.Emp_ID.apply(months_worked_baby)
df

Unnamed: 0,Emp_ID,Age,Gender,City,Education_Level,Salary,months_worked
0,1,28,Male,C23,Master,57387,3
1,2,31,Male,C7,Master,67016,2
2,4,43,Male,C13,Master,65603,5
3,5,29,Male,C9,College,46368,3
4,6,31,Female,C11,Bachelor,78728,5
...,...,...,...,...,...,...,...
2376,2784,33,Male,C24,College,82815,24
2377,2785,34,Female,C9,College,12105,3
2378,2786,44,Male,C19,College,35370,9
2379,2787,28,Female,C20,Master,69498,6


In [11]:
join_des = train.groupby(['Emp_ID'])['Joining Designation'].agg(['max'])
join_des = join_des.reset_index()
join_des.columns = ['Emp_ID','Joining_Designation']
join_des

def get_jdes(x):
  return join_des[join_des.Emp_ID==x].Joining_Designation.iloc[0]

df["Joining_Designation"] = join_des.Emp_ID.apply(get_jdes)
df

Unnamed: 0,Emp_ID,Age,Gender,City,Education_Level,Salary,months_worked,Joining_Designation
0,1,28,Male,C23,Master,57387,3,1
1,2,31,Male,C7,Master,67016,2,2
2,4,43,Male,C13,Master,65603,5,2
3,5,29,Male,C9,College,46368,3,1
4,6,31,Female,C11,Bachelor,78728,5,3
...,...,...,...,...,...,...,...,...
2376,2784,33,Male,C24,College,82815,24,2
2377,2785,34,Female,C9,College,12105,3,1
2378,2786,44,Male,C19,College,35370,9,2
2379,2787,28,Female,C20,Master,69498,6,1


In [12]:
cur_des = train.groupby(['Emp_ID'])['Designation'].agg(['max'])
cur_des = cur_des.reset_index()
cur_des.columns = ['Emp_ID','Designation']
cur_des
def get_cdes(x):
  return cur_des[cur_des.Emp_ID==x].Designation.iloc[0]

df["Designation"] = cur_des.Emp_ID.apply(get_cdes)
df

Unnamed: 0,Emp_ID,Age,Gender,City,Education_Level,Salary,months_worked,Joining_Designation,Designation
0,1,28,Male,C23,Master,57387,3,1,1
1,2,31,Male,C7,Master,67016,2,2,2
2,4,43,Male,C13,Master,65603,5,2,2
3,5,29,Male,C9,College,46368,3,1,1
4,6,31,Female,C11,Bachelor,78728,5,3,3
...,...,...,...,...,...,...,...,...,...
2376,2784,33,Male,C24,College,82815,24,2,3
2377,2785,34,Female,C9,College,12105,3,1,1
2378,2786,44,Male,C19,College,35370,9,2,2
2379,2787,28,Female,C20,Master,69498,6,1,1


In [13]:
qua_rat = train.groupby(['Emp_ID'])['Quarterly Rating'].agg(['min'])
qua_rat = qua_rat.reset_index()
qua_rat.columns = ['Emp_ID','Minimum_Quarterly_Rating']
qua_rat

def get_rat(x):
  return qua_rat[qua_rat.Emp_ID==x].Minimum_Quarterly_Rating.iloc[0]

df["Minimum_Quarterly_Rating"] = qua_rat.Emp_ID.apply(get_rat)
df

Unnamed: 0,Emp_ID,Age,Gender,City,Education_Level,Salary,months_worked,Joining_Designation,Designation,Minimum_Quarterly_Rating
0,1,28,Male,C23,Master,57387,3,1,1,2
1,2,31,Male,C7,Master,67016,2,2,2,1
2,4,43,Male,C13,Master,65603,5,2,2,1
3,5,29,Male,C9,College,46368,3,1,1,1
4,6,31,Female,C11,Bachelor,78728,5,3,3,1
...,...,...,...,...,...,...,...,...,...,...
2376,2784,33,Male,C24,College,82815,24,2,3,1
2377,2785,34,Female,C9,College,12105,3,1,1,1
2378,2786,44,Male,C19,College,35370,9,2,2,1
2379,2787,28,Female,C20,Master,69498,6,1,1,1


In [14]:
qua_rat = train.groupby(['Emp_ID'])['Quarterly Rating'].agg(['max'])
qua_rat = qua_rat.reset_index()
qua_rat.columns = ['Emp_ID','Maximum_Quarterly_Rating']
qua_rat

def get_rat(x):
  return qua_rat[qua_rat.Emp_ID==x].Maximum_Quarterly_Rating.iloc[0]

df["Maximum_Quarterly_Rating"] = qua_rat.Emp_ID.apply(get_rat)
df

Unnamed: 0,Emp_ID,Age,Gender,City,Education_Level,Salary,months_worked,Joining_Designation,Designation,Minimum_Quarterly_Rating,Maximum_Quarterly_Rating
0,1,28,Male,C23,Master,57387,3,1,1,2,2
1,2,31,Male,C7,Master,67016,2,2,2,1,1
2,4,43,Male,C13,Master,65603,5,2,2,1,1
3,5,29,Male,C9,College,46368,3,1,1,1,1
4,6,31,Female,C11,Bachelor,78728,5,3,3,1,2
...,...,...,...,...,...,...,...,...,...,...,...
2376,2784,33,Male,C24,College,82815,24,2,3,1,4
2377,2785,34,Female,C9,College,12105,3,1,1,1,1
2378,2786,44,Male,C19,College,35370,9,2,2,1,2
2379,2787,28,Female,C20,Master,69498,6,1,1,1,2


In [15]:
doj = train.groupby(['Emp_ID','Dateofjoining']).Dateofjoining.agg('count').to_frame()
doj.columns = ['doj_count']
doj = doj.reset_index()

def get_doj(x):
  return doj[doj.Emp_ID==x].Dateofjoining.iloc[0]

df["Dateofjoining"] = doj.Emp_ID.apply(get_doj)
df

Unnamed: 0,Emp_ID,Age,Gender,City,Education_Level,Salary,months_worked,Joining_Designation,Designation,Minimum_Quarterly_Rating,Maximum_Quarterly_Rating,Dateofjoining
0,1,28,Male,C23,Master,57387,3,1,1,2,2,2015-12-24
1,2,31,Male,C7,Master,67016,2,2,2,1,1,2017-11-06
2,4,43,Male,C13,Master,65603,5,2,2,1,1,2016-12-07
3,5,29,Male,C9,College,46368,3,1,1,1,1,2016-01-09
4,6,31,Female,C11,Bachelor,78728,5,3,3,1,2,2017-07-31
...,...,...,...,...,...,...,...,...,...,...,...,...
2376,2784,33,Male,C24,College,82815,24,2,3,1,4,2012-10-15
2377,2785,34,Female,C9,College,12105,3,1,1,1,1,2017-08-28
2378,2786,44,Male,C19,College,35370,9,2,2,1,2,2015-07-31
2379,2787,28,Female,C20,Master,69498,6,1,1,1,2,2015-07-21


In [17]:
X_train,X_val, y_train,y_val = train_test_split(X,y,random_state=42,test_size=0.2)

In [27]:
model = CatBoostRegressor(loss_function='RMSE')
model.fit(X_train,y_train,cat_features=['Gender','City','Education_Level','Dateofjoining'])

Learning rate set to 0.04377
0:	learn: 6.6102314	total: 12.4ms	remaining: 12.4s
1:	learn: 6.4288079	total: 40ms	remaining: 20s
2:	learn: 6.2719022	total: 51.5ms	remaining: 17.1s
3:	learn: 6.1260937	total: 62.5ms	remaining: 15.6s
4:	learn: 6.0025147	total: 70.3ms	remaining: 14s
5:	learn: 5.8598481	total: 81.1ms	remaining: 13.4s
6:	learn: 5.7602189	total: 85ms	remaining: 12.1s
7:	learn: 5.6325447	total: 103ms	remaining: 12.8s
8:	learn: 5.5329750	total: 114ms	remaining: 12.6s
9:	learn: 5.4316741	total: 122ms	remaining: 12.1s
10:	learn: 5.3339449	total: 131ms	remaining: 11.7s
11:	learn: 5.2340067	total: 142ms	remaining: 11.7s
12:	learn: 5.1343744	total: 154ms	remaining: 11.7s
13:	learn: 5.0428565	total: 165ms	remaining: 11.6s
14:	learn: 4.9567524	total: 177ms	remaining: 11.6s
15:	learn: 4.8806230	total: 186ms	remaining: 11.4s
16:	learn: 4.8222398	total: 192ms	remaining: 11.1s
17:	learn: 4.7577971	total: 201ms	remaining: 11s
18:	learn: 4.6905396	total: 213ms	remaining: 11s
19:	learn: 4.6316

161:	learn: 3.2789703	total: 1.99s	remaining: 10.3s
162:	learn: 3.2774331	total: 2s	remaining: 10.3s
163:	learn: 3.2752154	total: 2.01s	remaining: 10.3s
164:	learn: 3.2734884	total: 2.03s	remaining: 10.3s
165:	learn: 3.2715465	total: 2.04s	remaining: 10.2s
166:	learn: 3.2671765	total: 2.05s	remaining: 10.2s
167:	learn: 3.2655651	total: 2.08s	remaining: 10.3s
168:	learn: 3.2643869	total: 2.09s	remaining: 10.3s
169:	learn: 3.2633759	total: 2.1s	remaining: 10.3s
170:	learn: 3.2633694	total: 2.11s	remaining: 10.2s
171:	learn: 3.2625635	total: 2.12s	remaining: 10.2s
172:	learn: 3.2613617	total: 2.13s	remaining: 10.2s
173:	learn: 3.2570438	total: 2.14s	remaining: 10.2s
174:	learn: 3.2550176	total: 2.17s	remaining: 10.2s
175:	learn: 3.2534695	total: 2.18s	remaining: 10.2s
176:	learn: 3.2516093	total: 2.2s	remaining: 10.2s
177:	learn: 3.2493483	total: 2.21s	remaining: 10.2s
178:	learn: 3.2452587	total: 2.22s	remaining: 10.2s
179:	learn: 3.2434957	total: 2.23s	remaining: 10.2s
180:	learn: 3.241

331:	learn: 2.9836537	total: 4.19s	remaining: 8.44s
332:	learn: 2.9821047	total: 4.24s	remaining: 8.48s
333:	learn: 2.9804611	total: 4.25s	remaining: 8.47s
334:	learn: 2.9784255	total: 4.26s	remaining: 8.46s
335:	learn: 2.9768597	total: 4.27s	remaining: 8.45s
336:	learn: 2.9763427	total: 4.3s	remaining: 8.45s
337:	learn: 2.9731091	total: 4.31s	remaining: 8.44s
338:	learn: 2.9726340	total: 4.33s	remaining: 8.44s
339:	learn: 2.9711258	total: 4.34s	remaining: 8.43s
340:	learn: 2.9680776	total: 4.35s	remaining: 8.41s
341:	learn: 2.9665951	total: 4.37s	remaining: 8.4s
342:	learn: 2.9653547	total: 4.38s	remaining: 8.39s
343:	learn: 2.9620944	total: 4.39s	remaining: 8.38s
344:	learn: 2.9595968	total: 4.42s	remaining: 8.39s
345:	learn: 2.9584850	total: 4.43s	remaining: 8.37s
346:	learn: 2.9580287	total: 4.44s	remaining: 8.36s
347:	learn: 2.9569900	total: 4.46s	remaining: 8.36s
348:	learn: 2.9564400	total: 4.47s	remaining: 8.34s
349:	learn: 2.9536923	total: 4.48s	remaining: 8.33s
350:	learn: 2.

502:	learn: 2.7504485	total: 6.41s	remaining: 6.33s
503:	learn: 2.7487951	total: 6.42s	remaining: 6.32s
504:	learn: 2.7483400	total: 6.43s	remaining: 6.3s
505:	learn: 2.7472169	total: 6.44s	remaining: 6.29s
506:	learn: 2.7470657	total: 6.45s	remaining: 6.27s
507:	learn: 2.7468184	total: 6.46s	remaining: 6.26s
508:	learn: 2.7453815	total: 6.47s	remaining: 6.24s
509:	learn: 2.7440491	total: 6.49s	remaining: 6.23s
510:	learn: 2.7432219	total: 6.5s	remaining: 6.22s
511:	learn: 2.7427010	total: 6.51s	remaining: 6.2s
512:	learn: 2.7423380	total: 6.52s	remaining: 6.19s
513:	learn: 2.7406359	total: 6.53s	remaining: 6.17s
514:	learn: 2.7393106	total: 6.54s	remaining: 6.16s
515:	learn: 2.7383237	total: 6.55s	remaining: 6.15s
516:	learn: 2.7380809	total: 6.57s	remaining: 6.13s
517:	learn: 2.7379997	total: 6.58s	remaining: 6.12s
518:	learn: 2.7373916	total: 6.59s	remaining: 6.11s
519:	learn: 2.7363929	total: 6.6s	remaining: 6.09s
520:	learn: 2.7339462	total: 6.61s	remaining: 6.08s
521:	learn: 2.73

676:	learn: 2.5782478	total: 8.58s	remaining: 4.09s
677:	learn: 2.5773538	total: 8.59s	remaining: 4.08s
678:	learn: 2.5767466	total: 8.6s	remaining: 4.07s
679:	learn: 2.5758252	total: 8.62s	remaining: 4.05s
680:	learn: 2.5735224	total: 8.63s	remaining: 4.04s
681:	learn: 2.5720585	total: 8.65s	remaining: 4.04s
682:	learn: 2.5716047	total: 8.67s	remaining: 4.02s
683:	learn: 2.5707242	total: 8.68s	remaining: 4.01s
684:	learn: 2.5696163	total: 8.69s	remaining: 4s
685:	learn: 2.5683298	total: 8.72s	remaining: 3.99s
686:	learn: 2.5674756	total: 8.73s	remaining: 3.98s
687:	learn: 2.5659628	total: 8.75s	remaining: 3.97s
688:	learn: 2.5646996	total: 8.76s	remaining: 3.95s
689:	learn: 2.5641974	total: 8.77s	remaining: 3.94s
690:	learn: 2.5628486	total: 8.8s	remaining: 3.93s
691:	learn: 2.5614840	total: 8.81s	remaining: 3.92s
692:	learn: 2.5603410	total: 8.82s	remaining: 3.91s
693:	learn: 2.5588220	total: 8.83s	remaining: 3.9s
694:	learn: 2.5582942	total: 8.85s	remaining: 3.88s
695:	learn: 2.5575

848:	learn: 2.4138015	total: 10.8s	remaining: 1.92s
849:	learn: 2.4129681	total: 10.8s	remaining: 1.91s
850:	learn: 2.4121085	total: 10.8s	remaining: 1.89s
851:	learn: 2.4115139	total: 10.8s	remaining: 1.88s
852:	learn: 2.4109882	total: 10.8s	remaining: 1.87s
853:	learn: 2.4106154	total: 10.9s	remaining: 1.85s
854:	learn: 2.4104304	total: 10.9s	remaining: 1.84s
855:	learn: 2.4102906	total: 10.9s	remaining: 1.83s
856:	learn: 2.4100124	total: 10.9s	remaining: 1.82s
857:	learn: 2.4093098	total: 10.9s	remaining: 1.8s
858:	learn: 2.4089428	total: 10.9s	remaining: 1.79s
859:	learn: 2.4065732	total: 11s	remaining: 1.78s
860:	learn: 2.4062307	total: 11s	remaining: 1.77s
861:	learn: 2.4056008	total: 11s	remaining: 1.76s
862:	learn: 2.4050882	total: 11s	remaining: 1.75s
863:	learn: 2.4048459	total: 11s	remaining: 1.73s
864:	learn: 2.4045165	total: 11s	remaining: 1.72s
865:	learn: 2.4041634	total: 11s	remaining: 1.71s
866:	learn: 2.4030699	total: 11s	remaining: 1.69s
867:	learn: 2.4016540	total: 

<catboost.core.CatBoostRegressor at 0x14dd826b550>

In [51]:
y_pred_val = model.predict(X_val)

In [56]:
r2_score(y_pred_val,y_val)

0.8263408163975153

In [60]:
# df = df.drop('Dateofjoining',axis=1)


In [63]:
df
X = df.drop('months_worked',axis=1)
y = df['months_worked']

In [64]:
model.fit(X,y,cat_features=['Gender','City','Education_Level'])

Learning rate set to 0.045547
0:	learn: 6.5894424	total: 12.1ms	remaining: 12.1s
1:	learn: 6.4097736	total: 23.9ms	remaining: 11.9s
2:	learn: 6.2407225	total: 35.3ms	remaining: 11.7s
3:	learn: 6.0876786	total: 62.1ms	remaining: 15.5s
4:	learn: 5.9353770	total: 72.7ms	remaining: 14.5s
5:	learn: 5.7948122	total: 82.9ms	remaining: 13.7s
6:	learn: 5.6640716	total: 94.4ms	remaining: 13.4s
7:	learn: 5.5369636	total: 105ms	remaining: 13s
8:	learn: 5.4172900	total: 123ms	remaining: 13.5s
9:	learn: 5.3021201	total: 131ms	remaining: 13s
10:	learn: 5.1912358	total: 141ms	remaining: 12.7s
11:	learn: 5.0936174	total: 154ms	remaining: 12.7s
12:	learn: 5.0064458	total: 165ms	remaining: 12.6s
13:	learn: 4.9195349	total: 174ms	remaining: 12.2s
14:	learn: 4.8285895	total: 199ms	remaining: 13.1s
15:	learn: 4.7514596	total: 212ms	remaining: 13s
16:	learn: 4.6707595	total: 223ms	remaining: 12.9s
17:	learn: 4.6058973	total: 244ms	remaining: 13.3s
18:	learn: 4.5477375	total: 250ms	remaining: 12.9s
19:	learn:

170:	learn: 3.2945457	total: 2.17s	remaining: 10.5s
171:	learn: 3.2942770	total: 2.18s	remaining: 10.5s
172:	learn: 3.2927223	total: 2.19s	remaining: 10.5s
173:	learn: 3.2887356	total: 2.21s	remaining: 10.5s
174:	learn: 3.2876685	total: 2.22s	remaining: 10.5s
175:	learn: 3.2861446	total: 2.24s	remaining: 10.5s
176:	learn: 3.2824777	total: 2.25s	remaining: 10.5s
177:	learn: 3.2824758	total: 2.25s	remaining: 10.4s
178:	learn: 3.2811132	total: 2.27s	remaining: 10.4s
179:	learn: 3.2795315	total: 2.28s	remaining: 10.4s
180:	learn: 3.2794694	total: 2.29s	remaining: 10.4s
181:	learn: 3.2776126	total: 2.31s	remaining: 10.4s
182:	learn: 3.2769697	total: 2.32s	remaining: 10.3s
183:	learn: 3.2749829	total: 2.33s	remaining: 10.3s
184:	learn: 3.2733830	total: 2.34s	remaining: 10.3s
185:	learn: 3.2728801	total: 2.36s	remaining: 10.3s
186:	learn: 3.2706524	total: 2.37s	remaining: 10.3s
187:	learn: 3.2697276	total: 2.38s	remaining: 10.3s
188:	learn: 3.2697270	total: 2.39s	remaining: 10.2s
189:	learn: 

335:	learn: 3.0546638	total: 4.58s	remaining: 9.04s
336:	learn: 3.0539321	total: 4.59s	remaining: 9.03s
337:	learn: 3.0516227	total: 4.6s	remaining: 9.01s
338:	learn: 3.0513034	total: 4.61s	remaining: 8.99s
339:	learn: 3.0487835	total: 4.62s	remaining: 8.97s
340:	learn: 3.0479753	total: 4.63s	remaining: 8.95s
341:	learn: 3.0474775	total: 4.64s	remaining: 8.93s
342:	learn: 3.0463576	total: 4.65s	remaining: 8.91s
343:	learn: 3.0460637	total: 4.67s	remaining: 8.9s
344:	learn: 3.0455853	total: 4.68s	remaining: 8.89s
345:	learn: 3.0426669	total: 4.69s	remaining: 8.87s
346:	learn: 3.0403612	total: 4.7s	remaining: 8.85s
347:	learn: 3.0397168	total: 4.71s	remaining: 8.83s
348:	learn: 3.0382808	total: 4.73s	remaining: 8.82s
349:	learn: 3.0372394	total: 4.74s	remaining: 8.8s
350:	learn: 3.0351040	total: 4.75s	remaining: 8.79s
351:	learn: 3.0342025	total: 4.76s	remaining: 8.77s
352:	learn: 3.0335761	total: 4.79s	remaining: 8.78s
353:	learn: 3.0328760	total: 4.8s	remaining: 8.77s
354:	learn: 3.031

508:	learn: 2.8359700	total: 6.95s	remaining: 6.71s
509:	learn: 2.8345054	total: 6.96s	remaining: 6.69s
510:	learn: 2.8336927	total: 6.97s	remaining: 6.67s
511:	learn: 2.8321723	total: 6.98s	remaining: 6.65s
512:	learn: 2.8305809	total: 6.99s	remaining: 6.64s
513:	learn: 2.8287985	total: 7.01s	remaining: 6.63s
514:	learn: 2.8265679	total: 7.02s	remaining: 6.62s
515:	learn: 2.8261242	total: 7.04s	remaining: 6.6s
516:	learn: 2.8243404	total: 7.05s	remaining: 6.59s
517:	learn: 2.8231952	total: 7.07s	remaining: 6.58s
518:	learn: 2.8223792	total: 7.09s	remaining: 6.57s
519:	learn: 2.8212820	total: 7.1s	remaining: 6.55s
520:	learn: 2.8188056	total: 7.11s	remaining: 6.53s
521:	learn: 2.8160824	total: 7.12s	remaining: 6.52s
522:	learn: 2.8142526	total: 7.13s	remaining: 6.5s
523:	learn: 2.8127111	total: 7.15s	remaining: 6.49s
524:	learn: 2.8107538	total: 7.16s	remaining: 6.48s
525:	learn: 2.8102938	total: 7.18s	remaining: 6.47s
526:	learn: 2.8084609	total: 7.2s	remaining: 6.46s
527:	learn: 2.80

671:	learn: 2.6701978	total: 9.17s	remaining: 4.48s
672:	learn: 2.6693341	total: 9.18s	remaining: 4.46s
673:	learn: 2.6691060	total: 9.19s	remaining: 4.45s
674:	learn: 2.6674127	total: 9.21s	remaining: 4.43s
675:	learn: 2.6667041	total: 9.22s	remaining: 4.42s
676:	learn: 2.6656916	total: 9.24s	remaining: 4.41s
677:	learn: 2.6652088	total: 9.26s	remaining: 4.4s
678:	learn: 2.6645976	total: 9.27s	remaining: 4.38s
679:	learn: 2.6639724	total: 9.28s	remaining: 4.37s
680:	learn: 2.6635223	total: 9.29s	remaining: 4.35s
681:	learn: 2.6624257	total: 9.31s	remaining: 4.34s
682:	learn: 2.6620566	total: 9.32s	remaining: 4.32s
683:	learn: 2.6617626	total: 9.33s	remaining: 4.31s
684:	learn: 2.6602411	total: 9.34s	remaining: 4.29s
685:	learn: 2.6579682	total: 9.35s	remaining: 4.28s
686:	learn: 2.6575756	total: 9.36s	remaining: 4.26s
687:	learn: 2.6563032	total: 9.39s	remaining: 4.26s
688:	learn: 2.6560638	total: 9.4s	remaining: 4.24s
689:	learn: 2.6550384	total: 9.41s	remaining: 4.23s
690:	learn: 2.

830:	learn: 2.5418683	total: 11.4s	remaining: 2.31s
831:	learn: 2.5408294	total: 11.4s	remaining: 2.3s
832:	learn: 2.5396816	total: 11.4s	remaining: 2.28s
833:	learn: 2.5388226	total: 11.4s	remaining: 2.27s
834:	learn: 2.5378257	total: 11.4s	remaining: 2.26s
835:	learn: 2.5370958	total: 11.4s	remaining: 2.25s
836:	learn: 2.5367997	total: 11.5s	remaining: 2.23s
837:	learn: 2.5352276	total: 11.5s	remaining: 2.22s
838:	learn: 2.5346884	total: 11.5s	remaining: 2.21s
839:	learn: 2.5343733	total: 11.5s	remaining: 2.19s
840:	learn: 2.5331365	total: 11.5s	remaining: 2.18s
841:	learn: 2.5319709	total: 11.5s	remaining: 2.16s
842:	learn: 2.5302880	total: 11.5s	remaining: 2.15s
843:	learn: 2.5295666	total: 11.5s	remaining: 2.13s
844:	learn: 2.5289543	total: 11.6s	remaining: 2.12s
845:	learn: 2.5282141	total: 11.6s	remaining: 2.11s
846:	learn: 2.5275829	total: 11.6s	remaining: 2.09s
847:	learn: 2.5255226	total: 11.6s	remaining: 2.08s
848:	learn: 2.5247840	total: 11.6s	remaining: 2.06s
849:	learn: 2

999:	learn: 2.4001612	total: 13.7s	remaining: 0us


<catboost.core.CatBoostRegressor at 0x14dd826b550>

In [65]:
merged_right = pd.merge(left=df, right=test, how='right', left_on='Emp_ID', right_on='Emp_ID')
merged_right

Unnamed: 0,Emp_ID,Age,Gender,City,Education_Level,Salary,months_worked,Joining_Designation,Designation,Minimum_Quarterly_Rating,Maximum_Quarterly_Rating
0,394,32,Female,C20,Master,97722,24,2,4,1,3
1,173,37,Male,C28,College,56174,24,1,3,2,4
2,1090,37,Male,C13,College,96750,24,2,4,2,4
3,840,39,Female,C8,College,88813,24,1,4,1,2
4,308,30,Male,C5,Master,188418,24,2,5,2,4
...,...,...,...,...,...,...,...,...,...,...,...
736,2134,38,Male,C29,College,116006,24,2,5,1,4
737,2255,38,Male,C25,College,133489,24,3,4,2,4
738,448,35,Male,C10,Bachelor,65389,24,2,4,1,3
739,1644,46,Female,C9,Bachelor,105513,24,2,4,1,3


In [66]:
y_pred = model.predict(merged_right)

In [68]:
mysub = pd.read_csv('sample_submission_znWiLZ4.csv')

In [71]:
mysub.Target=y_pred

In [72]:
mysub.to_csv('mysub.csv',index=False)