# People Personality Predictor

In [None]:
#import dependencies
import numpy as np
import pandas as pd

In [None]:
#read data
df = pd.read_csv('personality_dataset.csv')
df = df.dropna() #to avoid empty cells

In [248]:
df.head()

Unnamed: 0,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency,Personality
0,4.0,No,4.0,6.0,No,13.0,5.0,Extrovert
1,9.0,Yes,0.0,0.0,Yes,0.0,3.0,Introvert
2,9.0,Yes,1.0,2.0,Yes,5.0,2.0,Introvert
3,0.0,No,6.0,7.0,No,14.0,8.0,Extrovert
4,3.0,No,9.0,4.0,No,8.0,5.0,Extrovert


In [None]:
#maps non-numeric features and label to 0 and 1
df['Stage_fear'] = df['Stage_fear'].map({'Yes':1, 'No':0})
df['Drained_after_socializing'] = df['Drained_after_socializing'].map({'Yes':1, 'No':0})
df['Personality'] = df['Personality'].map({'Extrovert':1, 'Introvert':0})

In [250]:
df.head()

Unnamed: 0,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency,Personality
0,4.0,0,4.0,6.0,0,13.0,5.0,1
1,9.0,1,0.0,0.0,1,0.0,3.0,0
2,9.0,1,1.0,2.0,1,5.0,2.0,0
3,0.0,0,6.0,7.0,0,14.0,8.0,1
4,3.0,0,9.0,4.0,0,8.0,5.0,1


In [None]:
#process data for numpy

x1 = np.array(df['Time_spent_Alone'])
x2 = np.array(df['Stage_fear'])
x3 = np.array(df['Social_event_attendance'])
x4 = np.array(df['Going_outside'])
x5 = np.array(df['Drained_after_socializing'])
x6 = np.array(df['Friends_circle_size'])
x7 = np.array(df['Post_frequency'])

x = np.stack([x1,x2,x3,x4,x5,x6,x7])
y = np.array(df['Personality'])

In [None]:
#train-test split
n = len(y)
x_train, x_test = x[:, :n // 2].T, x[:, n // 2:].T
y_train, y_test = y[:n // 2], y[n // 2:]

In [261]:
x_train.shape

(1239, 7)

In [262]:
x_train

array([[ 4.,  0.,  4., ...,  0., 13.,  5.],
       [ 9.,  1.,  0., ...,  1.,  0.,  3.],
       [ 9.,  1.,  1., ...,  1.,  5.,  2.],
       ...,
       [ 4.,  1.,  2., ...,  1.,  4.,  1.],
       [ 7.,  1.,  0., ...,  1.,  1.,  2.],
       [11.,  1.,  2., ...,  1.,  5.,  0.]])

In [None]:
class LogisticRegression:
    def __init__(self, x: np.ndarray, y: np.ndarray):
        '''
        //This class implements Logistic Regression to predict personality of a person i.e. Introvert and Extrovert. It consists of 3 optimization methods: adam, gradient descent (full-batch), and stochastic gradient descent (mini-batch).
        params:
            x(np.ndarray): inputs
            y(np.ndarray): labels
        '''
        self.x = x
        self.y = y
        self.n_features = self.x.shape[1]
        self.params = np.zeros(self.n_features+1)

    def sigmoid(self, z):
        return 1/(1+np.exp(-z))
    
    def loss(self, pred, label):
        c = 1e-10 #small constant to avoid log(0)
        pred = pred.reshape(-1, 1)
        label = label.reshape(-1, 1)
        error = pred - label
        loss = -label * np.log(pred+c) - (1 - label) * np.log(1 - pred+c) #bi
        return error, np.mean(loss)


    def forward(self, x, params):
        w = params[:self.n_features]
        b = params[self.n_features]
        pred =  x@w + b
        sig_pred = self.sigmoid(pred)
        labeled = np.where(sig_pred>=0.5,1,0)
        return sig_pred, labeled
    
    def compute_gradients(self, x, error):
        error = error.reshape(-1, 1)
        gm = x.T @ error
        gc = np.sum(error).reshape(1, 1)
        return np.vstack([gm, gc])

    def gradient_descent(self, x, y, lr=0.01, epochs=100):
        for _ in range(epochs):
            sig_pred, _ = self.forward(x,self.params)
            error, _ = self.loss(sig_pred, y)
            grads = self.compute_gradients(x, error)
            self.params-= lr*grads
        
        return self.params
    
    def create_batches(self, x, y, batch_size=100, shuffle=True):
        n = x.shape[0]

        if shuffle:
            indices = np.random.permutation(n)
            x = x[indices]
            y = y[indices]

        # Ensure y is a column vector for logistic regression broadcasting
        if y.ndim == 1:
            y = y[:, np.newaxis]  # shape becomes (N, 1)

        batches = []
        for i in range(0, n, batch_size):
            x_batch = x[i:i + batch_size]
            y_batch = y[i:i + batch_size]
            batches.append((x_batch, y_batch))

        return batches


    def gradient_descent(self, x, y, lr, epochs=100):
        for i in range(epochs):
            sig_pred, _ = self.forward(x,self.params)
            error, loss = self.loss(sig_pred, y)
            grads = self.compute_gradients(x, error)
            self.params-= lr*grads.flatten()
        
            if i%20==0:
                print("Loss: ", loss)

        return self.params
    
    def stochastic_gradient_descent(self, x, y, lr, epochs=100, batch_size=100):
        for i in range(epochs):
            batches = self.create_batches(x,y,batch_size=batch_size)
            for bx, by in batches:
                sig_pred, _ = self.forward(bx,self.params)
                error, loss = self.loss(sig_pred, by)
                grads = self.compute_gradients(bx, error)
                self.params-= lr*grads.flatten()

            if i%20==0:
                print("Loss: ", loss)
        
        return self.params
    
    def adam(self, x, y, lr, epochs=100, batch_size=100, beta=0.9, gamma=0.99, epsilon=1e-18):
        m = np.zeros_like(self.params)
        v = np.zeros_like(self.params)
        for i in range(1,epochs+1):
            batches = self.create_batches(x,y,batch_size=batch_size)
            for bx, by in batches:
                sig_pred, _ = self.forward(bx,self.params)
                error, loss = self.loss(sig_pred, by)
                grads = self.compute_gradients(bx, error)
                m = beta*m + (1-beta)*grads.flatten()
                v = gamma*v + (1-gamma)*(grads.flatten()**2)
                m_hat = m/(1-beta**i)
                v_hat = v/(1-gamma**i)
                self.params -= lr*m_hat/(np.sqrt(v_hat)+epsilon)

            if i%20==0:
                print("Loss: ", loss)
        return self.params

    def fit(self,optimizer, lr=0.01):
        if optimizer.lower()=="gd":
            params = self.gradient_descent(self.x,self.y,lr=lr)
            self.params = params
        
        elif optimizer.lower()=='sgd':
            params = self.stochastic_gradient_descent(self.x, self.y,lr=lr)
            self.params = params

        elif optimizer.lower()=='adam':
            params = self.adam(self.x, self.y,lr=lr)
            self.params = params

    
    def predict(self, x):
        _, labeled = self.forward(x,self.params)
        return labeled




In [311]:
model = LogisticRegression(x_train, y_train)
model.fit("adam")
y_pred = model.predict(x_test)
accuracy = np.mean(y_pred == y_test)
print(f"Test accuracy: {accuracy:.4f}")

Loss:  0.055497144341421334
Loss:  0.2140607062215079
Loss:  0.06193382801466514
Loss:  0.2661749992027805
Loss:  0.3116684356870154
Test accuracy: 0.8999


In [314]:
model.predict(np.array([6, 1, 3, 1, 1, 7,6]))

array(0)