In [114]:
# Suppressing Warnings
import warnings
warnings.filterwarnings('ignore')

In [115]:
# Importing Pandas and NumPy
import pandas as pd, numpy as np

In [116]:
# Importing all datasets
data_1 = pd.read_csv("Compatibility Questions.csv")
data_1.head()

Unnamed: 0,Compatibility Question,Sheldon,Joey,Rachael,Monica,Matching Probability
0,Do you have a huge tolerance capability?,Yes,No,No,No,1
1,Would you break off a relationship for practic...,Yes,No,Yes,No,0
2,Do you believe in faith?,No,No,No,No,2
3,Are you fine with me hanging out with friends...,No,Yes,No,No,1
4,Do you have a healthy relationship with your e...,No,No,No,No,2


In [117]:
# Let's check the dimensions of the dataframe
data_1.shape

(10, 6)

In [118]:
# Let's see the type of each column
data_1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 6 columns):
Compatibility Question    10 non-null object
Sheldon                   10 non-null object
Joey                      10 non-null object
Rachael                   10 non-null object
Monica                    10 non-null object
Matching Probability      10 non-null int64
dtypes: int64(1), object(5)
memory usage: 608.0+ bytes


In [119]:
# List of variables to map
varlist =  ['Sheldon', 'Joey', 'Rachael', 'Monica']

# Defining the map function
def binary_map(x):
    return x.map({'Yes': 1, "No": 0})

# Applying the function to the housing list
data_1[varlist] = data_1[varlist].apply(binary_map)
data_1.head(10)

Unnamed: 0,Compatibility Question,Sheldon,Joey,Rachael,Monica,Matching Probability
0,Do you have a huge tolerance capability?,1.0,0,0.0,0,1
1,Would you break off a relationship for practic...,1.0,0,1.0,0,0
2,Do you believe in faith?,0.0,0,0.0,0,2
3,Are you fine with me hanging out with friends...,0.0,1,0.0,0,1
4,Do you have a healthy relationship with your e...,,0,,0,2
5,Where would you like to settle down and have a...,1.0,1,1.0,1,2
6,Would you be able to support your partner in f...,1.0,0,,0,2
7,Do you believe in exchanging gifts?,0.0,1,,1,2
8,Do you follow your head or your heart,1.0,1,1.0,1,2
9,Do you believe in living in before getting mar...,1.0,1,1.0,1,2


In [120]:
# Adding up the missing values (column-wise)
data_1.isnull().sum()

Compatibility Question    0
Sheldon                   1
Joey                      0
Rachael                   3
Monica                    0
Matching Probability      0
dtype: int64

In [121]:
#NaN replacing with 1 
data_1 = data_1.replace(np.nan, 1)
data_1.head(10)

Unnamed: 0,Compatibility Question,Sheldon,Joey,Rachael,Monica,Matching Probability
0,Do you have a huge tolerance capability?,1.0,0,0.0,0,1
1,Would you break off a relationship for practic...,1.0,0,1.0,0,0
2,Do you believe in faith?,0.0,0,0.0,0,2
3,Are you fine with me hanging out with friends...,0.0,1,0.0,0,1
4,Do you have a healthy relationship with your e...,1.0,0,1.0,0,2
5,Where would you like to settle down and have a...,1.0,1,1.0,1,2
6,Would you be able to support your partner in f...,1.0,0,1.0,0,2
7,Do you believe in exchanging gifts?,0.0,1,1.0,1,2
8,Do you follow your head or your heart,1.0,1,1.0,1,2
9,Do you believe in living in before getting mar...,1.0,1,1.0,1,2


# Model Building

In [122]:
data_1 = data_1.drop('Compatibility Question', 1)
data_1.head(10)

Unnamed: 0,Sheldon,Joey,Rachael,Monica,Matching Probability
0,1.0,0,0.0,0,1
1,1.0,0,1.0,0,0
2,0.0,0,0.0,0,2
3,0.0,1,0.0,0,1
4,1.0,0,1.0,0,2
5,1.0,1,1.0,1,2
6,1.0,0,1.0,0,2
7,0.0,1,1.0,1,2
8,1.0,1,1.0,1,2
9,1.0,1,1.0,1,2


In [109]:
#Including SelectKBest
#With Pipeline
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import RFE
pipe = Pipeline((
("it", IterativeImputer()),
("pt",PowerTransformer()),
("sc", StandardScaler()),
("skb",SelectKBest(f_classif,k=3)),
("lr", LogisticRegression()),
))
pipe.fit(X_train,y_train)
print("Testing Accuracy")
print(pipe.score(X_test,y_test))
print("Training Accuracy")
print(pipe.score(X_train,y_train))

Testing Accuracy
0.5
Training Accuracy
0.875


In [110]:
predicted = pipe.predict(X_test)
from sklearn.metrics import confusion_matrix,classification_report,recall_score,precision_score,f1_score
print(confusion_matrix(y_test,predicted))
print(classification_report(y_test,predicted))

[[0 1]
 [0 1]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           2       0.50      1.00      0.67         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2



# CONCLUSION

1. As there is less data, therefore our test result is less as compare to our training score.
2. We can see that both male candidates have 50% matching probability for each female candidates.
3. Accuracy is coming out to be 50% 