In [None]:
import numpy as np 
import pandas as pd
import PIL.Image as Image
import os
import matplotlib.pyplot as plt
import lightgbm as lgb
import xgboost as xgb
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
from sklearn.ensemble import GradientBoostingRegressor,StackingRegressor, RandomForestRegressor, ExtraTreesRegressor
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import ElasticNetCV, LassoCV, RidgeCV


In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
df1 = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')

In [None]:
df1.drop([ 'Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
       'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'],axis = 1,inplace = True)

In [None]:
df1

In [None]:
Id = df1.Id

In [None]:
input_ = tf.keras.layers.Input((224,224,3))
model = tf.keras.applications.InceptionV3(include_top = False,weights=None,input_tensor=input_)

In [None]:
model.load_weights('../input/inceptionv3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5')

In [None]:
def make_image_array(img):
    x = Image.open(img)
    x = x.resize((224,224))
    x = np.array(x)
    x = x.reshape(1,224,224,3)
    return x/255.0

In [None]:
fig = plt.figure(figsize=(10,10))
i = 1
for idx in np.random.randint(0,4319,9):
  ax = fig.add_subplot(3,3,i)
  img = Image.open('../input/petfinder-pawpularity-score/train/' + os.listdir('../input/petfinder-pawpularity-score/train/')[idx])
  plt.imshow(img)
  i += 1
plt.axis('off')
plt.show()

# Extracting Features of Dataset.

In [None]:
input_dir = '../input/petfinder-pawpularity-score/train/'
l = {}
for i in os.listdir(input_dir):
    x_tr = make_image_array(input_dir + i)
    p = model.predict(x_tr)
    p = tf.keras.layers.GlobalAveragePooling2D()(p)
    p = tf.keras.layers.Flatten()(p)
    l[i[:-4]] = p.numpy()

In [None]:
for i in Id:
    l[i]  = l[i].squeeze()
df = pd.DataFrame(l.values())
df['Id'] = l.keys()
popular = []
for i in df.Id:
    popular.append(int(df1[df1['Id'] == i]['Pawpularity'].values))
df['Pawpularity'] = popular

## DataFrame with id,Pawpularity,Feature Vector 2048

In [None]:
df

In [None]:
X = df.iloc[:,:-2].values
y = df.iloc[:,-1].values

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.1,random_state = 42)

In [None]:
lgb_regressor=lgb.LGBMRegressor(objective='regression', num_leaves=5, learning_rate=0.035, n_estimators=2177, max_bin=50, bagging_fraction=0.65,bagging_freq=5, bagging_seed=7, 
                                feature_fraction=0.201, feature_fraction_seed=7,n_jobs=-1)
lgb_regressor.fit(X_train, y_train)
y_head=lgb_regressor.predict(X_test)
print('-'*10+'LGBM'+'-'*10)
print('R square Accuracy: ',r2_score(y_test,y_head))
print('Mean Absolute Error Accuracy: ',mean_absolute_error(y_test,y_head))
print('Mean Squared Error Accuracy: ',mean_squared_error(y_test,y_head))

In [None]:
gb_reg = GradientBoostingRegressor(n_estimators=1992, learning_rate=0.03005, max_depth=4, max_features='sqrt', min_samples_leaf=15, min_samples_split=14, loss='huber', random_state =42)
gb_reg.fit(X_train, y_train)
y_head=gb_reg.predict(X_test)
print('-'*10+'GBR'+'-'*10)
print('R square Accuracy: ',r2_score(y_test,y_head))
print('Mean Absolute Error Accuracy: ',mean_absolute_error(y_test,y_head))
print('Mean Squared Error Accuracy: ',mean_squared_error(y_test,y_head))

In [None]:
kfolds = KFold(n_splits=10, shuffle=True, random_state=42)

alphas=[1e-9,1e-8,1e-7,1e-6]

ridgecv_reg= make_pipeline(RidgeCV(alphas=alphas, cv=kfolds))
ridgecv_reg.fit(X_train, y_train)
y_head=ridgecv_reg.predict(X_test)
print('-'*10+'RidgeCV'+'-'*10)
print('R square Accuracy: ',r2_score(y_test,y_head))
print('Mean Absolute Error Accuracy: ',mean_absolute_error(y_test,y_head))
print('Mean Squared Error Accuracy: ',mean_squared_error(y_test,y_head))

In [None]:
kfolds = KFold(n_splits=8, shuffle=True, random_state=42)

alphas=[0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006]
l1ratio=[0.87, 0.9,0.92, 0.95,0.97, 0.99, 1]

elasticv_reg= make_pipeline(ElasticNetCV(alphas=alphas, cv=kfolds, l1_ratio=l1ratio))
elasticv_reg.fit(X_train, y_train)
y_head=elasticv_reg.predict(X_test)
print('-'*10+'ElasticNetCV'+'-'*10)
print('R square Accuracy: ',r2_score(y_test,y_head))
print('Mean Absolute Error Accuracy: ',mean_absolute_error(y_test,y_head))
print('Mean Squared Error Accuracy: ',mean_squared_error(y_test,y_head))

In [None]:
kfolds = KFold(n_splits=8, shuffle=True, random_state=42)

lassocv_reg= make_pipeline(LassoCV(alphas=alphas, cv=kfolds))
lassocv_reg.fit(X_train, y_train)
y_head=lassocv_reg.predict(X_test)
print('-'*10+'LassoCV'+'-'*10)
print('R square Accuracy: ',r2_score(y_test,y_head))
print('Mean Absolute Error Accuracy: ',mean_absolute_error(y_test,y_head))
print('Mean Squared Error Accuracy: ',mean_squared_error(y_test,y_head))

In [None]:
estimators = [('lgbm', lgb_regressor),
              ('gbr', gb_reg),   
              ('lasso', lassocv_reg),   
              ('ridge', ridgecv_reg),   
              ('elasticnet', elasticv_reg)]

stack_reg=StackingRegressor(estimators=estimators,final_estimator=ExtraTreesRegressor(n_estimators=50),n_jobs=-1)
stack_reg.fit(X_train, y_train)
y_head=stack_reg.predict(X_test)
print('-'*10+'StackingRegressor'+'-'*10)
print('R square Accuracy: ',r2_score(y_test,y_head))
print('Mean Absolute Error Accuracy: ',mean_absolute_error(y_test,y_head))
print('Mean Squared Error Accuracy: ',mean_squared_error(y_test,y_head))

In [None]:
input_dir = '../input/petfinder-pawpularity-score/test/'
l_test = {}
for i in os.listdir(input_dir):
    x_tr = make_image_array(input_dir + i)
    p = model.predict(x_tr)
    p = tf.keras.layers.GlobalAveragePooling2D()(p)
    p = tf.keras.layers.Flatten()(p)
    l_test[i[:-4]] = p.numpy()

In [None]:
df_test = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')
Id_test = df_test.Id

In [None]:
for i in Id_test:
    l_test[i]  = l_test[i].squeeze()
df_test_ = pd.DataFrame(l_test.values())
df_test_['Id'] = l_test.keys()

In [None]:
df_test_

In [None]:
testId = df_test_.Id 
test_pwr = df_test_.iloc[:,:-1].values

In [None]:
test_pred_lgb=lgb_regressor.predict(test_pwr)
test_pred_gb=gb_reg.predict(test_pwr)
test_pred_elastic=elasticv_reg.predict(test_pwr)
test_pred_ridge=ridgecv_reg.predict(test_pwr)
test_pred_lasso=lassocv_reg.predict(test_pwr)
test_pred_stack=stack_reg.predict(test_pwr)

In [None]:
test_pred_lgb=pd.DataFrame(test_pred_lgb,columns=['Pawpularity'])
test_pred_gb=pd.DataFrame(test_pred_gb,columns=['Pawpularity'])
test_pred_elastic=pd.DataFrame(test_pred_elastic,columns=['Pawpularity'])
test_pred_ridge=pd.DataFrame(test_pred_ridge,columns=['Pawpularity'])
test_pred_lasso=pd.DataFrame(test_pred_lasso,columns=['Pawpularity'])
test_pred_stack=pd.DataFrame(test_pred_stack,columns=['Pawpularity'])

In [None]:
test_pred_lgb.Pawpularity =np.floor(np.expm1(test_pred_lgb.Pawpularity))
test_pred_gb.Pawpularity =np.floor(np.expm1(test_pred_gb.Pawpularity))
test_pred_elastic.Pawpularity =np.floor(np.expm1(test_pred_elastic.Pawpularity))
test_pred_ridge.Pawpularity =np.floor(np.expm1(test_pred_ridge.Pawpularity))
test_pred_lasso.Pawpularity =np.floor(np.expm1(test_pred_lasso.Pawpularity))
test_pred_stack.Pawpularity =np.floor(np.expm1(test_pred_stack.Pawpularity))

In [None]:
final_pred=(test_pred_stack*0.1665)+(test_pred_lgb*0.678)+(test_pred_ridge*0.1665)
final_pred.head()

In [None]:
sample_sub = pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')

In [None]:
sample_sub['Pawpularity']=final_pred

In [None]:
sample_sub['Id'] = testId

In [None]:
sample_sub

In [None]:
sample_sub.to_csv('submission.csv',index=False)

In [None]:
print('done')