# Adopt A Buddy
## HackerEarth Machine Learning Challenge

In [2]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split


Following Version of the libraries is used

In [18]:
print('Pandas Version: {}\nNumpy Version: {}\nSklearn Version: {}'.format(pd.__version__,np.__version__,sklearn.__version__))

Pandas Version: 1.0.5
Numpy Version: 1.19.1
Sklearn Version: 0.23.1


###### Loading Datasets

In [19]:
train=pd.read_csv('pettrain.csv')
train.head()

Unnamed: 0,pet_id,issue_date,listing_date,condition,color_type,length(m),height(cm),X1,X2,breed_category,pet_category
0,ANSL_69903,2016-07-10 00:00:00,2016-09-21 16:25:00,2.0,Brown Tabby,0.8,7.78,13,9,0.0,1
1,ANSL_66892,2013-11-21 00:00:00,2018-12-27 17:47:00,1.0,White,0.72,14.19,13,9,0.0,2
2,ANSL_69750,2014-09-28 00:00:00,2016-10-19 08:24:00,,Brown,0.15,40.9,15,4,2.0,4
3,ANSL_71623,2016-12-31 00:00:00,2019-01-25 18:30:00,1.0,White,0.62,17.82,0,1,0.0,2
4,ANSL_57969,2017-09-28 00:00:00,2017-11-19 09:38:00,2.0,Black,0.5,11.06,18,4,0.0,1


In [20]:
test=pd.read_csv('pettest.csv')
test.set_index('pet_id',inplace=True)
test.head()

Unnamed: 0_level_0,issue_date,listing_date,condition,color_type,length(m),height(cm),X1,X2
pet_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ANSL_75005,2005-08-17 00:00:00,2017-09-07 15:35:00,0.0,Black,0.87,42.73,0,7
ANSL_76663,2018-11-15 00:00:00,2019-05-08 17:24:00,1.0,Orange Tabby,0.06,6.71,0,1
ANSL_58259,2012-10-11 00:00:00,2018-04-02 16:51:00,1.0,Black,0.24,41.21,0,7
ANSL_67171,2015-02-13 00:00:00,2018-04-06 07:25:00,1.0,Black,0.29,8.46,7,1
ANSL_72871,2017-01-18 00:00:00,2018-04-26 13:42:00,1.0,Brown,0.71,30.92,0,7


Let's have a look at datasets

In [21]:
#Making a new feature using issue_date and listing_date
#More features can be extracted such as: issue and listing month, issue and listing year, quarter if the year etc.
train['date_diff']=(pd.to_datetime(train['listing_date']).dt.date - pd.to_datetime(train['issue_date']).dt.date).fillna(pd.Timedelta('-1 days')).dt.days

In [22]:
train.drop(columns=['issue_date','listing_date'],axis=1,inplace=True)

In [23]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18834 entries, 0 to 18833
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   pet_id          18834 non-null  object 
 1   condition       17357 non-null  float64
 2   color_type      18834 non-null  object 
 3   length(m)       18834 non-null  float64
 4   height(cm)      18834 non-null  float64
 5   X1              18834 non-null  int64  
 6   X2              18834 non-null  int64  
 7   breed_category  18834 non-null  float64
 8   pet_category    18834 non-null  int64  
 9   date_diff       18834 non-null  int64  
dtypes: float64(4), int64(4), object(2)
memory usage: 1.4+ MB


So we see there's some non-null values in condition column
Let's see those rows

In [47]:
train['condition'].unique()

array([ 2.,  1., nan,  0.])

In [44]:
train[train['breed_category']==2]

Unnamed: 0,pet_id,condition,color_type,length(m),height(cm),X1,X2,breed_category,pet_category,date_diff
2,ANSL_69750,,Brown,0.15,40.90,15,4,2.0,4,752
21,ANSL_50488,,Brown,0.45,14.00,11,4,2.0,4,202
35,ANSL_54000,,White,0.03,21.90,13,9,2.0,1,28
38,ANSL_50330,,Brown,0.73,8.22,0,6,2.0,4,206
43,ANSL_58673,,Black,0.08,30.40,9,2,2.0,1,69
...,...,...,...,...,...,...,...,...,...,...
18761,ANSL_62681,,Orange Tabby,0.28,13.09,13,9,2.0,1,21
18777,ANSL_69550,,Brown Tabby,0.96,8.97,13,9,2.0,1,43
18801,ANSL_65823,,Brown Tabby,0.18,23.49,18,4,2.0,1,3309
18812,ANSL_63707,,Brown,0.58,40.07,15,4,2.0,4,752


All nan values corresponds to a particular breed_category so we can replace nan with a value(say 3)

In [48]:


train.fillna(value=3,inplace=True)

In [49]:
train.shape

(18834, 10)

In [50]:
#Converting Unit to cm
train.rename(columns={'length(m)':'length(cm)'},inplace=True)

In [51]:
train['length(cm)']*=100

In [57]:
#CHeck unique values of height and length
print(train['length(cm)'].unique(),train['length(cm)'].nunique(),train['height(cm)'].unique(),train['height(cm)'].nunique(),sep='\n')

[ 80.  72.  15.  62.  50.  92.  14.   5.  55.  30.  44.  20.  45.  48.
  18.  88.  93.  76.  96.  66.  31.  51.  53.  32.  98.  68.  16.  60.
  83.   3.  91.  73.  90.  65.  79.   8.  52.  42.   7.  41.  54.  19.
  67.  70.  33.   9.   4.  38.  10.  49.  24.  17.  27.  58.   1.  56.
  46.  64.   0.  89.  43.   2.  21.  11.  84.   6.  61.  81.  82.  99.
  95.  37.  69.  63.  36.  97.  40.  77. 100.  47.  87.  29.  12.  26.
  85.  28.  78.  86.  71.  74.  75.  22.  35.  57.  39.  94.  25.  13.
  23.  59.  34.]
101
[ 7.78 14.19 40.9  ... 17.28 12.35 42.87]
4425


>We see that there is a 0 length values also , so we should handle this......
> Also, ther's lot of ambiguity in height as no. of unique values is large(4425) so we can drop that column. But for now I am keeping it

In [58]:
# replace all 0 length with mean of lengths
val = train['length(cm)'].mean()
train['length(cm)'] = train['length(cm)'].replace(to_replace=0, value=val)
#test['length(cm)'] = test['length(cm)'].replace(to_replace=0, value=val)

In [59]:
#Getting ratio of height to length
train['ratio']=train['height(cm)']/train['length(cm)']

In [60]:
train.head()

Unnamed: 0,pet_id,condition,color_type,length(cm),height(cm),X1,X2,breed_category,pet_category,date_diff,ratio
0,ANSL_69903,2.0,Brown Tabby,80.0,7.78,13,9,0.0,1,73,0.09725
1,ANSL_66892,1.0,White,72.0,14.19,13,9,0.0,2,1862,0.197083
2,ANSL_69750,3.0,Brown,15.0,40.9,15,4,2.0,4,752,2.726667
3,ANSL_71623,1.0,White,62.0,17.82,0,1,0.0,2,755,0.287419
4,ANSL_57969,2.0,Black,50.0,11.06,18,4,0.0,1,52,0.2212


Now let's deal with Categorical Values..(color_type)

In [61]:
train['color_type'].nunique()

56

Since Cardinality of color_type is not very high so we can use one-hot-encoding.
I'm using get_dummies() metohod for this.

In [62]:
train2=pd.get_dummies(train['color_type'])

In [63]:
train2.head()

Unnamed: 0,Agouti,Apricot,Black,Black Brindle,Black Smoke,Black Tabby,Black Tiger,Blue,Blue Cream,Blue Merle,...,Silver Lynx Point,Silver Tabby,Tan,Torbie,Tortie,Tortie Point,Tricolor,White,Yellow,Yellow Brindle
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [64]:
train3=pd.concat([train,train2],axis=1)
train3.head()

Unnamed: 0,pet_id,condition,color_type,length(cm),height(cm),X1,X2,breed_category,pet_category,date_diff,...,Silver Lynx Point,Silver Tabby,Tan,Torbie,Tortie,Tortie Point,Tricolor,White,Yellow,Yellow Brindle
0,ANSL_69903,2.0,Brown Tabby,80.0,7.78,13,9,0.0,1,73,...,0,0,0,0,0,0,0,0,0,0
1,ANSL_66892,1.0,White,72.0,14.19,13,9,0.0,2,1862,...,0,0,0,0,0,0,0,1,0,0
2,ANSL_69750,3.0,Brown,15.0,40.9,15,4,2.0,4,752,...,0,0,0,0,0,0,0,0,0,0
3,ANSL_71623,1.0,White,62.0,17.82,0,1,0.0,2,755,...,0,0,0,0,0,0,0,1,0,0
4,ANSL_57969,2.0,Black,50.0,11.06,18,4,0.0,1,52,...,0,0,0,0,0,0,0,0,0,0


In [65]:
#Define X. Dropping unnecessary columns
X=train3.drop(['pet_id','breed_category','pet_category','Black Tiger', 'Brown Tiger','color_type','height(cm)','length(cm)'],axis=1).copy()
X.head()

Unnamed: 0,condition,X1,X2,date_diff,ratio,Agouti,Apricot,Black,Black Brindle,Black Smoke,...,Silver Lynx Point,Silver Tabby,Tan,Torbie,Tortie,Tortie Point,Tricolor,White,Yellow,Yellow Brindle
0,2.0,13,9,73,0.09725,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1.0,13,9,1862,0.197083,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3.0,15,4,752,2.726667,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1.0,0,1,755,0.287419,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,2.0,18,4,52,0.2212,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [66]:
y1=train3['breed_category']

We have to do same thing with our test data

In [67]:
test['date_diff']=(pd.to_datetime(test['listing_date']).dt.date - pd.to_datetime(test['issue_date']).dt.date).fillna(pd.Timedelta('-1 days')).dt.days

In [68]:
test.drop(columns=['issue_date','listing_date'],axis=1,inplace=True)

In [69]:
test.fillna(value=3,inplace=True)

In [70]:
test.rename(columns={'length(m)':'length(cm)'},inplace=True)

In [71]:
test['length(cm)']*=100

In [72]:
val=test['length(cm)'].mean()
test['length(cm)'] = test['length(cm)'].replace(to_replace=0, value=val)

In [73]:
test['ratio']=test['height(cm)']/test['length(cm)']

In [74]:
test.head()

Unnamed: 0_level_0,condition,color_type,length(cm),height(cm),X1,X2,date_diff,ratio
pet_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ANSL_75005,0.0,Black,87.0,42.73,0,7,4404,0.491149
ANSL_76663,1.0,Orange Tabby,6.0,6.71,0,1,174,1.118333
ANSL_58259,1.0,Black,24.0,41.21,0,7,1999,1.717083
ANSL_67171,1.0,Black,29.0,8.46,7,1,1148,0.291724
ANSL_72871,1.0,Brown,71.0,30.92,0,7,463,0.435493


In [75]:
test2=pd.get_dummies(test['color_type'])

In [76]:

test3=pd.concat([test,test2],axis=1)
test3.drop(['color_type','height(cm)','length(cm)'],axis=1,inplace=True)
test3

Unnamed: 0_level_0,condition,X1,X2,date_diff,ratio,Agouti,Apricot,Black,Black Brindle,Black Smoke,...,Silver Lynx Point,Silver Tabby,Tan,Torbie,Tortie,Tortie Point,Tricolor,White,Yellow,Yellow Brindle
pet_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ANSL_75005,0.0,0,7,4404,0.491149,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
ANSL_76663,1.0,0,1,174,1.118333,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ANSL_58259,1.0,0,7,1999,1.717083,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
ANSL_67171,1.0,7,1,1148,0.291724,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
ANSL_72871,1.0,0,7,463,0.435493,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ANSL_66809,2.0,13,9,394,0.440000,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ANSL_59041,0.0,13,9,798,0.562041,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
ANSL_60034,0.0,0,7,393,0.379490,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
ANSL_58066,3.0,0,2,387,0.301646,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [77]:
X_test=test3[X.columns]
X_test.head()

Unnamed: 0_level_0,condition,X1,X2,date_diff,ratio,Agouti,Apricot,Black,Black Brindle,Black Smoke,...,Silver Lynx Point,Silver Tabby,Tan,Torbie,Tortie,Tortie Point,Tricolor,White,Yellow,Yellow Brindle
pet_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ANSL_75005,0.0,0,7,4404,0.491149,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
ANSL_76663,1.0,0,1,174,1.118333,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ANSL_58259,1.0,0,7,1999,1.717083,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
ANSL_67171,1.0,7,1,1148,0.291724,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
ANSL_72871,1.0,0,7,463,0.435493,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [78]:
y = train3[['breed_category','pet_category']]

In [79]:
y1=train3['breed_category']
y1

0        0.0
1        0.0
2        2.0
3        0.0
4        0.0
        ... 
18829    0.0
18830    2.0
18831    1.0
18832    1.0
18833    1.0
Name: breed_category, Length: 18834, dtype: float64

### Creating And Evaluating our Model

In [80]:
#SPlitting on dataset. for breed Category only
X_train,Xt,y_train,yt=train_test_split(X,y1,test_size=0.2,random_state=0)

In [81]:
from sklearn.ensemble import GradientBoostingClassifier
import xgboost as xgb
from sklearn.metrics import f1_score,make_scorer
from sklearn.model_selection import GridSearchCV
#from sklearn import cross_validation, metrics 

my_sc = make_scorer(f1_score , average='weighted')

In [None]:
#clf1= GradientBoostingClassifier(learning_rate=0.3, n_estimators=500,max_depth=3, min_samples_split=10, min_samples_leaf=2,max_features='sqrt',random_state=0)

In [None]:
#clf1.fit(X_train,y_train)

In [82]:
clf1=xgb.XGBClassifier(learning_rate=0.1,n_estimators=100, max_depth=5,
 min_child_weight=1,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,random_state=0)
clf1.fit(X_train,y_train,eval_metric=my_sc)

XGBClassifier(colsample_bytree=0.8, max_depth=5, objective='multi:softprob',
              subsample=0.8)

In [None]:
#modelfit(clf1, X_train,y_train)

In [84]:
clf1.score(X_train,y_train)

0.926860025220681

In [85]:
pred1=clf1.predict(Xt)

In [86]:
f1_score(yt,pred1,average='weighted')

0.9049692232278427

In [83]:
##Parameter Tuning for 1st Model

In [88]:
#Tuning 3 arguments
param_test1 = {
    'n_estimators':[120,140,150,300,500],
 'max_depth':range(3,6),
 'min_child_weight':range(3,7)
}

In [None]:
gs1=GridSearchCV(clf1,param_grid=param_test1,scoring=my_sc,cv=3,n_jobs=-1)

In [None]:
gs1.fit(X_train,y_train)

In [None]:
#Get best parameters
gs1.best_params_,gs1.best_score_

In [None]:
#Tuning gamma parameter
param_test3 = {
 'gamma':[0.01,0.1,0.25,0.3,0.35]
}
gs2 = GridSearchCV(estimator = xgb.XGBClassifier( learning_rate =0.1, n_estimators=120, max_depth=4,
 min_child_weight=5, gamma=0, subsample=0.8, colsample_bytree=0.8,random_state=0), 
 param_grid = param_test3, scoring=my_sc,n_jobs=-1, cv=3)

In [None]:
gs2.fit(X_train,y_train)

In [None]:
gs2.best_params_,gs2.best_score_

In [None]:
#Tuning Alpha
param_test4 = {
 'reg_alpha':[1e-5, 1e-2, 0.1, 1, 100]
}
gs3 = GridSearchCV(estimator = xgb.XGBClassifier( learning_rate =0.1, n_estimators=120, max_depth=4,
 min_child_weight=2, gamma=0.1, subsample=0.8, colsample_bytree=0.8,random_state=0), 
 param_grid = param_test4, scoring=my_sc,n_jobs=-1, cv=3)
gs3.fit(X_train,y_train)

In [None]:
gs3.best_params_,gs3.best_score_

In [89]:
#Create a model with tuned parameters
model1=xgb.XGBClassifier(
 learning_rate =0.1,
 n_estimators=120,
 max_depth=4,
 min_child_weight=5,
 gamma=0.1,
 subsample=0.8,
 colsample_bytree=0.8,
 reg_alpha=1e-05,random_state=0)
model1.fit(X_train,y_train,eval_metric=my_sc)

XGBClassifier(colsample_bytree=0.8, gamma=0.1, max_depth=4, min_child_weight=5,
              n_estimators=120, objective='multi:softprob', reg_alpha=1e-05,
              subsample=0.8)

In [90]:
model1.score(X_train,y_train)

0.924271586911794

In [91]:
p1=model1.predict(Xt)

In [92]:
f1_score(yt,p1,average='weighted')

0.9049869626426201

In [93]:
#For pet_category Only

In [94]:
y2=train3['pet_category']

In [95]:
X_train2,Xt2,y_train2,yt2=train_test_split(X,y2,test_size=0.2,random_state=0)

In [96]:
clf2=xgb.XGBClassifier(learning_rate=0.1,n_estimators=100, max_depth=5,
 min_child_weight=1,
 gamma=0,
 subsample=0.8,
 colsample_bytree=0.8,random_state=0)
clf2.fit(X_train2,y_train2)

XGBClassifier(colsample_bytree=0.8, max_depth=5, objective='multi:softprob',
              subsample=0.8)

In [97]:
clf2.score(X_train2,y_train2)

0.9019048251144887

In [98]:
pred2=clf2.predict(Xt2)

In [99]:
f1_score(yt2,pred2,average='weighted')

0.8999622732318409

In [None]:
#Parameter tuning for 2nd Model

In [None]:
gs1=GridSearchCV(clf2,param_grid=param_test1,scoring=my_sc,cv=3,n_jobs=-1)
gs1.fit(X_train2,y_train2)

In [None]:
gs1.best_params_,gs1.best_score_

In [None]:
param_test3 = {
 'gamma':[0.01,0.1,0.25,0.3,0.35]
}
gs2 = GridSearchCV(estimator = xgb.XGBClassifier( learning_rate =0.1, n_estimators=300, max_depth=4,
 min_child_weight=3, gamma=0, subsample=0.8, colsample_bytree=0.8,random_state=0), 
 param_grid = param_test3, scoring=my_sc,n_jobs=-1, cv=3)

In [None]:
gs2.fit(X_train2,y_train2)

In [None]:
gs2.best_params_,gs2.best_score_

In [None]:
param_test4 = {
 'reg_alpha':[1e-5, 1e-2, 0.1, 1, 100]
}
gs3 = GridSearchCV(estimator = xgb.XGBClassifier( learning_rate =0.1, n_estimators=300, max_depth=4,
 min_child_weight=3, gamma=0.1, subsample=0.8, colsample_bytree=0.8,random_state=0), 
 param_grid = param_test4, scoring=my_sc,n_jobs=-1, cv=3)
gs3.fit(X_train,y_train)

In [None]:
gs3.best_params_,gs3.best_score_

In [100]:
#Define Model with tuned Parameters
model2=xgb.XGBClassifier(
 learning_rate =0.1,
 n_estimators=300,
 max_depth=4,
 min_child_weight=3,
 gamma=0.1,
 subsample=0.8,
 colsample_bytree=0.8,
 reg_alpha=0.1,random_state=0)
model2.fit(X_train2,y_train2,eval_metric=my_sc)

XGBClassifier(colsample_bytree=0.8, gamma=0.1, max_depth=4, min_child_weight=3,
              n_estimators=300, objective='multi:softprob', reg_alpha=0.1,
              subsample=0.8)

In [101]:
model2.score(X_train2,y_train2)


0.9107320634499236

In [102]:
p2=model2.predict(Xt2)
f1_score(yt2,p2,average='weighted')

0.9033840534629174

### Predictions

In [103]:
y_pred1=model1.predict(X_test)

In [104]:
y_pred2=model2.predict(X_test)

In [107]:
preddf=pd.DataFrame({'breed':y_pred1,'pet':y_pred2})
preddf

Unnamed: 0,breed,pet
0,1.0,2
1,0.0,1
2,0.0,2
3,0.0,2
4,0.0,2
...,...,...
8067,0.0,2
8068,1.0,2
8069,1.0,2
8070,2.0,4


OR alternatively we can use MultiOutputClassifier using one of the two models

In [108]:
from sklearn.multioutput import MultiOutputClassifier
clf1=MultiOutputClassifier(model2).fit(X,y)

In [109]:
y_pred=clf1.predict(X_test)

In [110]:
preddf=pd.DataFrame(y_pred,columns=['breed','pet'])
preddf

Unnamed: 0,breed,pet
0,1.0,2.0
1,0.0,1.0
2,0.0,2.0
3,0.0,2.0
4,0.0,2.0
...,...,...
8067,1.0,2.0
8068,1.0,2.0
8069,1.0,2.0
8070,2.0,4.0


In [111]:
t1=X_test.copy()
t1.reset_index(inplace=True)
final=pd.concat([t1,preddf],axis=1)

In [112]:
with open('res.csv','w') as r:
    r.write('pet_id,breed_category,pet_category\n')
    for i in final.index[:-1]:
        print('{},{},{}'.format(final.loc[i]['pet_id'],final.loc[i]['breed'],final.loc[i]['pet']), file=r)
    i=final.index[-1]
    print('{},{},{}'.format(final.loc[i]['pet_id'],final.loc[i]['breed'],final.loc[i]['pet']), file=r)
        
    