In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
train=pd.read_csv('train1.csv')
test=pd.read_csv('test1.csv')

In [3]:
train.head()

Unnamed: 0,id,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency,Personality
0,0,0.0,No,6.0,4.0,No,15.0,5.0,Extrovert
1,1,1.0,No,7.0,3.0,No,10.0,8.0,Extrovert
2,2,6.0,Yes,1.0,0.0,,3.0,0.0,Introvert
3,3,3.0,No,7.0,3.0,No,11.0,5.0,Extrovert
4,4,1.0,No,4.0,4.0,No,13.0,,Extrovert


In [4]:
train.isnull().sum()


id                              0
Time_spent_Alone             1190
Stage_fear                   1893
Social_event_attendance      1180
Going_outside                1466
Drained_after_socializing    1149
Friends_circle_size          1054
Post_frequency               1264
Personality                     0
dtype: int64

In [5]:
import numpy as np

# Define columns to convert
binary_features = ['Stage_fear', 'Drained_after_socializing']

# Function to safely convert 'Yes'/'No' to 1/0 while keeping NaNs
def convert_binary_columns(df, columns):
    for col in columns:
        # Convert to string, map values
        df[col] = df[col].astype(str).map({'No': 0, 'Yes': 1})
        # Replace 'nan' (string) with real NaN
        df[col] = df[col].replace('nan', np.nan)
    return df

# Apply to train and test
train = convert_binary_columns(train, binary_features)
test = convert_binary_columns(test, binary_features)
# Convert Personality to 1 = Introvert, 0 = Extrovert
train['Personality'] = train['Personality'].astype(str).map({'Extrovert': 0, 'Introvert': 1})
train['Personality'] = train['Personality'].replace('nan', np.nan)


In [6]:
train.head()

Unnamed: 0,id,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency,Personality
0,0,0.0,0.0,6.0,4.0,0.0,15.0,5.0,0
1,1,1.0,0.0,7.0,3.0,0.0,10.0,8.0,0
2,2,6.0,1.0,1.0,0.0,,3.0,0.0,1
3,3,3.0,0.0,7.0,3.0,0.0,11.0,5.0,0
4,4,1.0,0.0,4.0,4.0,0.0,13.0,,0


In [7]:
from sklearn.model_selection import train_test_split
X = train.drop('Personality', axis=1)
y = train['Personality']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [28]:
param_grid = {
    'max_depth': [2,3,4,5,6,7],
    'learning_rate': [ 0.1, 0.2,0.3],
    'n_estimators': [25,50,100],
    'subsample': [0.6,0.8,1],
    'colsample_bytree': [0.6,0.8,1],
    'min_child_weight':[1,3,5],
}


In [29]:
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
xgmodel=XGBClassifier()
grid_search = GridSearchCV(estimator=xgmodel,
                           param_grid=param_grid,
                           scoring='average_precision',        # or 'roc_auc' or 'average_precision'
                           cv=5                      # 5-fold cross-validation
                          )                

grid_search.fit(X_train,y_train)
y_pred = grid_search.predict(X_val)


In [30]:
grid_search.best_params_

{'colsample_bytree': 0.6,
 'learning_rate': 0.1,
 'max_depth': 3,
 'min_child_weight': 5,
 'n_estimators': 50,
 'subsample': 1}

In [31]:
from sklearn.metrics import accuracy_score,classification_report
print("Accuracy:", accuracy_score(y_val, y_pred))
print(classification_report(y_val, y_pred))

Accuracy: 0.9686909581646423
              precision    recall  f1-score   support

           0       0.98      0.98      0.98      2753
           1       0.95      0.93      0.94       952

    accuracy                           0.97      3705
   macro avg       0.96      0.96      0.96      3705
weighted avg       0.97      0.97      0.97      3705



In [32]:
y_pred_test = grid_search.predict(test)

In [33]:
df=pd.read_csv('test1.csv')
# Convert predictions from 0/1 to labels
y_pred_labels = ['Introvert' if pred == 1 else 'Extrovert' for pred in y_pred_test]

submission = pd.DataFrame({
    'id': test['id'],             # or test.index if no id column
    'Personality': y_pred_labels
})

# Save to CSV
submission.to_csv('introvert.csv', index=False)

