In [67]:
# import
import pandas as pd 
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [68]:
# Load the data
df = pd.read_csv("../data/zodiac_marriage_success.csv", index_col="Unnamed: 0")
df.head()

Unnamed: 0,zodiac1,zodiac2,divorce
22554,Scorpio,Aries,0
12203,Aquarius,Gemini,0
17722,Taurus,Virgo,0
7319,Scorpio,Gemini,0
21310,Taurus,Gemini,0


In [69]:
# do one-hot encoding to Zod_sign_man and Zod_sign_woman and combine them together into a dataframe as X
X1 = pd.get_dummies(df['zodiac1']).values
X2 = pd.get_dummies(df['zodiac2']).values
X = pd.DataFrame(X1 + X2)

# define y variable 
y = df["divorce"]

# Rename the columns to zodiac sign
adjacency_matrix = pd.crosstab(df.zodiac1, df.zodiac2)
idx = adjacency_matrix.columns.union(adjacency_matrix.index)
X.columns = idx

# Rearranging index to start from 1
X.index = np.arange(1, len(X) + 1)

# avoid the multi-line formatting
pd.set_option('expand_frame_repr', False)

print(X)


      Aquarius  Aries  Cancer  Capricorn  Gemini  Leo  Libra  Pisces  Sagittarius  Scorpio  Taurus  Virgo
1            0      1       0          0       0    0      0       0            0        1       0      0
2            1      0       0          0       1    0      0       0            0        0       0      0
3            0      0       0          0       0    0      0       0            0        0       1      1
4            0      0       0          0       1    0      0       0            0        1       0      0
5            0      0       0          0       1    0      0       0            0        0       1      0
...        ...    ...     ...        ...     ...  ...    ...     ...          ...      ...     ...    ...
2564         1      0       0          1       0    0      0       0            0        0       0      0
2565         0      1       0          0       0    1      0       0            0        0       0      0
2566         0      0       0          0      

In [70]:
X_train, X_test, y_train, y_test = train_test_split(X, y)
classifier = LogisticRegression(solver='lbfgs',
                                max_iter=200)
classifier.fit(X_train, y_train)
predictions_test = classifier.predict(X_test)
accuracy_score(y_test, predictions_test)

0.5233644859813084

In [71]:
# Create df with all possible zodiac combos
from itertools import product
zodiacs = ['Aries', 'Taurus', 'Gemini', 'Cancer', 'Leo', 'Virgo', 'Libra', 'Scorpio', 'Sagittarius', 'Capricorn', 'Aquarius', 'Pisces']
zodiacs_df = pd.DataFrame(zodiacs, columns=["zodiac"])
all_zodiacs = pd.DataFrame(product(zodiacs_df['zodiac'], zodiacs_df['zodiac']), columns=["zodiac1","zodiac2"])
all_zodiacs

Unnamed: 0,zodiac1,zodiac2
0,Aries,Aries
1,Aries,Taurus
2,Aries,Gemini
3,Aries,Cancer
4,Aries,Leo
...,...,...
139,Pisces,Scorpio
140,Pisces,Sagittarius
141,Pisces,Capricorn
142,Pisces,Aquarius


In [72]:
X1_pred = pd.get_dummies(all_zodiacs['zodiac1']).values
X2_pred = pd.get_dummies(all_zodiacs['zodiac2']).values
X_pred = pd.DataFrame(X1_pred + X2_pred)

# Rename the columns to zodiac sign
adjacency_matrix_new = pd.crosstab(all_zodiacs["zodiac1"], all_zodiacs["zodiac2"])
idx_new = adjacency_matrix_new.columns.union(adjacency_matrix_new.index)
X_pred.columns = idx_new

# Rearranging index to start from 1
X_pred.index = np.arange(1, len(X_pred) + 1)

# avoid the multi-line formatting
pd.set_option('expand_frame_repr', False)

In [73]:
predictions = classifier.predict(X_pred)

In [74]:
import regex as re

all_zodiacs["bad_match"] = predictions
compat = pd.read_csv("../data/Comp_matrix.csv")
p = '(?<=.)(?=[A-Z])'
compat[["zodiac1","zodiac2"]] = compat["Zodiac_combination"].str.split(p,expand=True)
compat = compat[["zodiac1","zodiac2","Compatibility_rate"]]
all_zodiacs = pd.merge(all_zodiacs, compat, on=["zodiac1","zodiac2"])

In [75]:
all_zodiacs.to_json("../UI/Static//zodiac_outcome.json",orient='index')
all_zodiacs.to_csv("../data/zodiac_outcome.csv")