In [83]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [84]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [85]:
import pandas as pd
train_data = pd.read_csv('/content/drive/MyDrive/Google collab/Project/Dataset/train dataset.csv')
test_data = pd.read_csv('/content/drive/MyDrive/Google collab/Project/Dataset/test dataset.csv')
test_data.head()

Unnamed: 0,Gender,Age,openness,neuroticism,conscientiousness,agreeableness,extraversion,Personality (class label)
0,Female,20,7,9,9,5,5,dependable
1,Male,17,5,4,5,2,4,serious
2,Female,25,5,5,7,2,4,serious
3,Female,18,6,2,7,4,7,serious
4,Female,19,2,4,7,1,3,responsible


#Data Preparation and Preprocessing




In [86]:
# Handle missing values (e.g., fill with mean for numerical columns)
#calculates the mean (average) value for each column
train_data.fillna(train_data.mean(), inplace=True)
test_data.fillna(test_data.mean(), inplace=True)


  train_data.fillna(train_data.mean(), inplace=True)
  test_data.fillna(test_data.mean(), inplace=True)


In [87]:
# Encode categorical features (e.g., 'Gender')
label_encoder = LabelEncoder()
train_data['Gender'] = label_encoder.fit_transform(train_data['Gender'])
test_data['Gender'] = label_encoder.transform(test_data['Gender'])


In [88]:
test_data.head() #female 1, male 2

Unnamed: 0,Gender,Age,openness,neuroticism,conscientiousness,agreeableness,extraversion,Personality (class label)
0,1,20,7,9,9,5,5,dependable
1,2,17,5,4,5,2,4,serious
2,1,25,5,5,7,2,4,serious
3,1,18,6,2,7,4,7,serious
4,1,19,2,4,7,1,3,responsible


In [89]:
# Standardize numerical features
# The StandardScaler is used for standardizing numerical features, which means it transforms them so that they have a mean of 0 and a standard deviation of 1.
# This normalization ensures that features are on a similar scale,
scaler = StandardScaler()
numerical_columns = ['Age', 'openness', 'neuroticism', 'conscientiousness', 'agreeableness', 'extraversion']
train_data[numerical_columns] = scaler.fit_transform(train_data[numerical_columns])
test_data[numerical_columns] = scaler.transform(test_data[numerical_columns])


In [90]:
# Split the data into features (X) and target labels (y)
X_train = train_data.drop('Personality (Class label)', axis=1)
y_train = train_data['Personality (Class label)']
X_test = test_data.drop('Personality (class label)', axis=1)
y_test = test_data['Personality (class label)']


# Model Selection and Training

In [100]:
# Train a multinomial Logistic Regression model
# Multinomial logistic regression used for multi-class classification problems where the target variable has more than two classes.
# solver="newton-cg": This specifies the optimization algorithm to use when fitting the logistic regression model.
model = LogisticRegression(multi_class="multinomial", solver="newton-cg")
model.fit(X_train, y_train)

#The choice of optimization algorithm, depends on several factors, including the dataset's size, complexity, and the desired trade-off between accuracy and computational efficiency.
# Newton-CG Solver: "Newton-CG" stands for Newton Conjugate Gradient.
#It's a numerical optimization algorithm that combines elements of Newton's method with the conjugate gradient method

In [101]:
# standardizing the numerical features for both the training and test datasets using the scaler
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [102]:
# Make predictions on the test data
y_pred = model.predict(X_test) #inverse this

#model: This variable represents the logistic regression model. It has learned the relationships between the input features and the target labels during the training process.
# y_pred will contain the predicted class labels for the test data.

In [103]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:\n", classification_rep)

#Precision is a measure of how many of the predicted positive instances were actually correct. (True Positives) / (True Positives + False Positives)
#Recall measures how many of the actual positive instances were correctly predicted as positive by the model. (True Positives) / (True Positives + False Negatives)
#The F1-score is the harmonic mean of precision and recall. The F1-score ranges from 0 to 1, where higher values indicate better model performance.
#Support represents the number of occurrences of each class in the true dataset. Support is not a metric but rather a count of instances in each class.


Accuracy: 0.75
Classification Report:
               precision    recall  f1-score   support

  dependable       0.43      0.14      0.21        21
 extraverted       0.68      0.91      0.78        77
      lively       0.64      0.75      0.69        24
 responsible       0.72      0.65      0.68        40
     serious       0.85      0.78      0.82       153

    accuracy                           0.75       315
   macro avg       0.66      0.65      0.64       315
weighted avg       0.75      0.75      0.74       315



In [104]:
4
6
# User Input and Prediction
print("\nPredict Personality based on user input:")
gender = input("Gender (Male/Female): ")
age = float(input("Age: "))
openness = float(input("Openness (1-10): "))
neuroticism = float(input("Neuroticism (1-10): "))
conscientiousness = float(input("Conscientiousness (1-10): "))
agreeableness = float(input("Agreeableness (1-10): "))
extraversion = float(input("Extraversion (1-10): "))



Predict Personality based on user input:
Gender (Male/Female): Female
Age: 21
Openness (1-10): 3
Neuroticism (1-10): 4
Conscientiousness (1-10): 5
Agreeableness (1-10): 6
Extraversion (1-10): 7


In [105]:
# Create a DataFrame with user input
user_data = pd.DataFrame({
    "Gender": [gender],
    "Age": [age],
    "openness": [openness],
    "neuroticism": [neuroticism],
    "conscientiousness": [conscientiousness],
    "agreeableness": [agreeableness],
    "extraversion": [extraversion]
})

# Preprocess user input (encode categorical features and standardize numerical features)
user_data['Gender'] = label_encoder.transform(user_data['Gender'])
user_data = scaler.transform(user_data)

# Make a prediction
predicted_personality = model.predict(user_data)

print("Predicted Personality:", predicted_personality[0])

Predicted Personality: dependable


