### In this model we will apply all our deep learning concepts to predict whether a person is introvert or extrovert.<br>
### Lets start with the notebook.


# Importing Libraries

In [None]:
!pip install scikeras

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scikeras.wrappers import KerasClassifier
import warnings
warnings.filterwarnings('ignore')
import keras
from keras import optimizers,regularizers
from keras.regularizers import l1_l2
from keras.models import Sequential
from keras.layers import Dense,Dropout
from scikeras.wrappers import KerasClassifier
import numpy as np
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder, OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from tensorflow import random
from keras.callbacks import EarlyStopping



# Reading Files

In [None]:
train=pd.read_csv('/kaggle/input/playground-series-s5e7/train.csv',index_col="id")
test=pd.read_csv('/kaggle/input/playground-series-s5e7/test.csv',index_col="id")
submission=pd.read_csv('/kaggle/input/playground-series-s5e7/sample_submission.csv')

In [None]:
#Checking the Sample submission
submission.head()

In [None]:
print(submission.shape)

# Meet and Greet Data

In [None]:
#Checking Train and Test contents
#Train
train.head(3)

In [None]:
#Test
test.tail(3)

In [None]:
#Checking Shapes
print(train.shape)
print(test.shape)

In [None]:
#Checking distribution of target data
plt.pie(train['Personality'].value_counts(),labels=train['Personality'].value_counts().keys(),autopct='%1.1f%%',textprops={'fontsize':20,'fontweight':'bold'})
plt.show()

In [None]:
#Checking Missing testues
missing_train=(train.isnull().sum()[train.isnull().sum()>0]).to_frame().rename(columns={0:'No of Missing Values'})
missing_train['% of Missing Values']=round((100*train.isnull().sum()[train.isnull().sum()>0]/len(train)),2)
missing_train.sort_values(by=['% of Missing Values'],ascending=False,inplace=True)
missing_train

In [None]:
missing_test=(test.isnull().sum()[test.isnull().sum()>0]).to_frame().rename(columns={0:'No of Missing Values'})
missing_test['% of Missing Values']=round((100*test.isnull().sum()[test.isnull().sum()>0]/len(test)),2)
missing_test.sort_values(by=['% of Missing Values'],ascending=False,inplace=True)
missing_test

In [None]:
print(train.info())
print("\n")
print("*"*40)
print("\n")
print(test.info())

In [None]:
train.describe()

In [None]:
train.columns

# Data Preprocessing

<p> I am not comfortable combining the datasets for data preprocessing so I will be do preprocessing and cleaning for train and test simultaneously.</p>

In [None]:
cat_cols=train.select_dtypes(include=['object']).columns.tolist()
cat_cols.pop()
num_cols=train.select_dtypes(include=['number']).columns.tolist()
print(cat_cols)
print(num_cols)

In [None]:
X=train.iloc[:,:-1]
y=train.iloc[:,-1]



In [None]:
print(X.head(5))

In [None]:
print(y.head(5))

In [None]:
#Splitting the train and test set
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=1,shuffle=True)
print(X_train.shape,y_test.shape)
print(X_test.shape,y_test.shape)

### Label Encoding the Categorical Variable

In [None]:
le=LabelEncoder()
y_train=le.fit_transform(y_train)
y_test=le.transform(y_test)


In [None]:
le_name_mapping=dict(zip(le.classes_,le.transform(le.classes_)))
le_name_mapping

In [None]:
print(y_train[:30])

In [None]:
print(y_test[:30])

### Creating Pipeline

In [None]:
num_pipeline = make_pipeline(SimpleImputer(strategy="median"),StandardScaler())
cat_pipeline=make_pipeline(SimpleImputer(strategy="most_frequent"),OrdinalEncoder(handle_unknown='use_encoded_value',unknown_value=np.nan))


### Making the Column Transformer

In [None]:
ct=ColumnTransformer([("num",num_pipeline,num_cols),("cat",cat_pipeline,cat_cols)],verbose_feature_names_out=False,remainder='passthrough').set_output(transform='pandas')

In [None]:
X_train=pd.DataFrame(ct.fit_transform(X_train),columns=ct.get_feature_names_out())
X_test=pd.DataFrame(ct.transform(X_test),columns=ct.get_feature_names_out())


In [None]:
test=pd.DataFrame(ct.transform(test),columns=ct.get_feature_names_out())

In [None]:
print(test.shape)

In [None]:
#Checking the Transformed dataframe shape
print(X_train.shape,y_train.shape)
print(X_test.shape,y_test.shape)

In [None]:
print(test.shape)

In [None]:
X_train['Stage_fear']=X_train['Stage_fear'].astype(int)
X_test['Stage_fear']=X_test['Stage_fear'].astype(int)
X_train['Drained_after_socializing']=X_train['Drained_after_socializing'].astype(int)
X_test['Drained_after_socializing']=X_test['Drained_after_socializing'].astype(int)


In [None]:
test['Stage_fear']=test['Stage_fear'].astype(int)
test['Drained_after_socializing']=test['Drained_after_socializing'].astype(int)

In [None]:
test.head()

In [None]:
#Checking the contents of Transformed Dataset
X_train.head()

In [None]:
print(X_train.info())
print("\n")
print("*"*40)
print("\n")
print(X_test.info())

In [None]:
#Checking the NUll testues
X_train.isnull().sum()

In [None]:
X_test.isnull().sum()

# Preparing Data for Modelling

### Setting the Initial Neural Network

In [None]:
seed=12
np.random.seed(seed)
random.set_seed(seed)

### Setting the Parameters Grid

In [None]:
'''params = {
'model__activation':['relu','tanh'],
'model__optimizer': ['adam','rmsprop','sgd'],
'batch_size':[50,100],
'model__dropout': [0.1,0.2],
'epochs':[100,200]
}
'''

In [None]:
'''
def create_model(activation,optimizer,dropout):
    model = Sequential()
    model.add(Dense(input_dim=X_train.shape[1], units=128, activation=activation,kernel_regularizer=l1_l2(l1=0.001,l2=0.001)))
    model.add(Dropout(dropout))
    model.add(Dense(units=64, activation=activation,kernel_regularizer=l1_l2(l1=0.001,l2=0.001)))
    model.add(Dropout(dropout))
    model.add(Dense(units=8, activation=activation,kernel_regularizer=l1_l2(l1=0.001,l2=0.001)))
    model.add(Dense(units=1,activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model
'''

In [None]:
#model=KerasClassifier(model=create_model)'''

In [None]:
#random_search=RandomizedSearchCV(estimator=model,param_distributions=params,cv=5,verbose=1,n_iter=3)'''

In [None]:
#random_search_result=random_search.fit(X_train,y_train)

#print("Best Paramaters",random_search.best_params_)
#print("Best Score",random_search.best_score_)

<p>We have got best parameters and now we will be building the final model </p>

# Building the Final Model

In [None]:
def final_model():
    model = Sequential()
    model.add(Dense(input_dim=X_train.shape[1], units=256, activation='relu',kernel_regularizer=l1_l2(l1=0.001,l2=0.001)))
    model.add(Dropout(0.1))
    model.add(Dense(units=64, activation='relu',kernel_regularizer=l1_l2(l1=0.001,l2=0.001)))
    model.add(Dropout(0.1))
    model.add(Dense(units=8, activation='relu',kernel_regularizer=l1_l2(l1=0.001,l2=0.001)))
    model.add(Dropout(0.1))
    model.add(Dense(units=1,activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])
    return model

In [None]:
fn_model=final_model()

In [None]:
es_callback = EarlyStopping(monitor='test_loss', \
mode='min', patience=20)

# Fitting the Model

In [None]:
history=fn_model.fit(X_train,y_train,epochs=100,batch_size=100,validation_data=(X_test,y_test),callbacks=[es_callback])

# Validating Holdout Data

In [None]:
fn_model.evaluate(X_test,y_test)

# Predicting Test File and Final submission

In [None]:
predictions=fn_model.predict(test)

In [None]:
final_predictions=(predictions > 0.5).astype(int)
submission["Personality"]=le.inverse_transform(final_predictions)
submission.to_csv("submission.csv", index=False)
submission.head()