# <black>Importing all the necessary libraries required.

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [3]:
trainDataframe = pd.read_csv('train.csv', names=["type", "tweets"], dtype={
    "type": str, "tweets": str})
testDataframe = pd.read_csv('test.csv', names=["type", "tweets"], dtype={
    "type": str, "tweets": str})
testSingleDataframe = pd.read_csv('testSingle.csv', names=["type", "tweets"], dtype={
    "type": str, "tweets": str})
TWEETS_PER_PERSON = 50

personalities = {
    "extrovert": {"I": 0, "E": 1},    # key-pair values
    "sensible": {"N": 0, "S": 1},
    "emotional": {"T": 0, "F": 1},
    "perceiving": {"J": 0, "P": 1}
}
attribute_counters = {
    "Longs_PT": "...",
    "Surprise_PT": "!",
    "Images_PT": "jpg",
    "Questions_PT": "?",
    "Music_PT": "music",
    "URLS_PT": "http",
    "Words_PT": " "
}
def processXY(df, isCheck=False):
    index = 0
    if not isCheck:
        for personality, symbol_map in personalities.items():
            # generate numeric column for each personality type
            # since model can only be trained on numeric column
            df[personality] = df["type"].str[index].map(symbol_map)
            index += 1

    for newcol, newcolcounter in attribute_counters.items():
        df[newcol] = df["tweets"].apply(
            lambda x: x.count(newcolcounter) / TWEETS_PER_PERSON)

    df = df.fillna(0)
    # example
    print(df.Surprise_PT)
       
    X = None

    # drop non-numeric columns
    
    if isCheck:
        X = df.drop(["type", "tweets"], axis=1).values     # axis=1 means column drop; axis=0 means row drop
    else:
        X = df.drop(["type", "emotional", "sensible",
                     "extrovert", "perceiving", "tweets"], axis=1).values

    # Change this string to check different person type
    Y = [] if isCheck else df[["extrovert"]].values.ravel()

    return (X, Y)

In [4]:
X_train, y_train = processXY(trainDataframe)
X_test, y_test = processXY(testDataframe)
  

0       0.06
1       0.00
2       0.08
3       0.06
4       0.02
5       0.00
6       0.06
7       0.00
8       0.02
9       0.06
10      0.20
11      0.56
12      0.16
13      0.12
14      0.04
15      0.08
16      0.40
17      0.20
18      0.12
19      0.10
20      0.04
21      0.02
22      0.40
23      0.08
24      0.10
25      0.22
26      0.24
27      0.24
28      0.12
29      0.14
        ... 
6970    0.12
6971    0.00
6972    0.08
6973    0.52
6974    0.02
6975    0.34
6976    0.24
6977    0.02
6978    0.02
6979    0.08
6980    0.10
6981    0.08
6982    1.28
6983    0.42
6984    0.22
6985    0.18
6986    0.10
6987    0.06
6988    0.02
6989    0.02
6990    0.00
6991    0.14
6992    0.00
6993    0.12
6994    0.00
6995    0.02
6996    0.06
6997    0.86
6998    0.42
6999    0.40
Name: Surprise_PT, Length: 7000, dtype: float64
0      0.42
1      0.26
2      0.10
3      0.20
4      0.20
5      0.70
6      0.28
7      0.08
8      0.10
9      0.10
10     0.10
11     0.12
12     0.02
13 

## Applying Logistic Regression

In [5]:
model = LogisticRegression(solver="newton-cg", max_iter=100)
model.fit(X_train, y_train)

accuracy = round(model.score(X_test, y_test) * 100, 2)
print(round(accuracy, 2, ), "% (Accuracy)")            # 75.6% accuracy




75.6 % (Accuracy)


## Testing the Trained Model

In [6]:
# to display predictions for test single 
X_singletest, y_temp = processXY(testSingleDataframe, isCheck=True)
y_answer = model.predict(X_singletest)
s = "Extrovert" if y_answer[0] else "Introvert"
print(f"The single test case is {s}")

exit(0)
print("Other tests are as follows:")
# display predictions for all people
for i in range(X_test.shape[0]):
    s = "Extrovert" if y_test[i] else "Introvert"
    print(f"Person {i + 1} = {s}")

0    0.22
Name: Surprise_PT, dtype: float64
The single test case is Introvert
Other tests are as follows:
Person 1 = Extrovert
Person 2 = Introvert
Person 3 = Extrovert
Person 4 = Introvert
Person 5 = Introvert
Person 6 = Extrovert
Person 7 = Extrovert
Person 8 = Introvert
Person 9 = Introvert
Person 10 = Introvert
Person 11 = Introvert
Person 12 = Introvert
Person 13 = Introvert
Person 14 = Extrovert
Person 15 = Introvert
Person 16 = Introvert
Person 17 = Introvert
Person 18 = Extrovert
Person 19 = Extrovert
Person 20 = Introvert
Person 21 = Introvert
Person 22 = Introvert
Person 23 = Introvert
Person 24 = Introvert
Person 25 = Introvert
Person 26 = Introvert
Person 27 = Introvert
Person 28 = Extrovert
Person 29 = Introvert
Person 30 = Introvert
Person 31 = Introvert
Person 32 = Introvert
Person 33 = Introvert
Person 34 = Extrovert
Person 35 = Extrovert
Person 36 = Introvert
Person 37 = Introvert
Person 38 = Extrovert
Person 39 = Introvert
Person 40 = Introvert
Person 41 = Introvert
P