In [1]:
import math
from pathlib import Path
import numpy as np
import pandas as pd

In [2]:
def read_edit(csv_dir):
    attribute_res = pd.read_csv(
        csv_dir,
        header=None,
        sep="\s+",
        names=["model_name", "pair_file", "pair_size", "Acc"],
    )

    attribute_res["attr_name"] = attribute_res.pair_file.apply(
        lambda x: x.split("from")[0]
    )  # subgroup based
    # attribute_res["attr_name"] = attribute_res.pair_file.apply(
    #     lambda x: x.split('_')[0]) #attribute based

    attribute_res["model_name"] = attribute_res.model_name.apply(
        lambda x: x.split("/")[-2]
    )

    attribute_res = attribute_res.drop(columns=["pair_file", "pair_size"])
    return attribute_res

In [3]:
a = read_edit(
    "../../test_assets/AttributePairs/setup2/attribute_rfw_test_results.log"
)
b = read_edit(
    "../../test_assets/AttributePairs/setup1/attribute_rfw_test_results.log"
)

attribute_res = pd.concat([a, b])
mean = attribute_res.groupby(["model_name", "attr_name"], observed=True).mean()
mean = mean.rename(columns={"Acc": "Mean_Acc"})

attribute_res = attribute_res.merge(
    mean, on=["model_name", "attr_name"], how="left", validate="m:1"
)
attribute_res = attribute_res.drop_duplicates(subset=["model_name", "attr_name"])

attribute_res = attribute_res[~attribute_res.attr_name.isin(['race_African_6000.csv','race_Asian_6000.csv','race_Indian_6000.csv','race_Caucasian_6000.csv'])]


for i,gg in attribute_res.groupby('model_name'):

    print(gg.model_name.unique(),gg.std()* 100)
    print("std:", np.std(gg.Acc.values,ddof=1) * 100)
    print("bias:", (1-gg.Acc.values.min()) / (1-gg.Acc.values.max()))
    attribute_res_excluded = gg[~gg.attr_name.isin(['skintype_type1_6000.csv','haircolor_red_6000.csv'])]
    print("excluded std:", np.std(attribute_res_excluded.Acc.values,ddof=1) * 100)
    print("excluded bias:", (1-attribute_res_excluded.Acc.values.min()) / (1-attribute_res_excluded.Acc.values.max()))

['setup1_model'] Acc         2.44847
Mean_Acc    2.44847
dtype: float64
std: 2.4484696707477154
bias: 3.9332886356017416
excluded std: 2.399852979171312
excluded bias: 3.9332886356017416
['setup2_model'] Acc         2.060341
Mean_Acc    2.060341
dtype: float64
std: 2.060340787807872
bias: 3.3129144300599966
excluded std: 1.7725220443370566
excluded bias: 3.0493020493020526


In [4]:
attribute_res.columns

Index(['model_name', 'Acc', 'attr_name', 'Mean_Acc'], dtype='object')

## Table 3. Attribute-based face verification performance of RFW. σ represents the standard deviation of all attribute category accuracies, including red hair and type 1, σ∗represents excluding standard deviation.
### it presents one attribute based results on 3000 positive 3000 negative 
### 21 attribute score + 4 african asian indian caucasian score + 2 metric bias and std 27 row, 2 column

In [5]:
attribute_res["Mean_Acc"] = attribute_res["Mean_Acc"].apply(
    lambda x: "{:.2f}".format(x * 100)
)

attribute_res["attr_name"] = attribute_res["attr_name"].apply(lambda x: ' '.join(x.split("_")[0:2]))
attribute_res["attr_name"] = attribute_res["attr_name"].str.replace("skintype", "")
attribute_res["attr_name"] = attribute_res["attr_name"].str.replace(
    "haircolor", "Hair "
)
attribute_res["attr_name"] = attribute_res["attr_name"].str.replace(
    "hairtype", "Hair "
)
attribute_res["attr_name"] = attribute_res["attr_name"].str.replace("lips", "Lips ")
attribute_res["attr_name"] = attribute_res["attr_name"].str.replace("eye", "eye ")
attribute_res["attr_name"] = attribute_res["attr_name"].str.replace("nose", "nose ")
attribute_res["attr_name"] = attribute_res["attr_name"].str.title()
attribute_res["attr_name"] = attribute_res["attr_name"].apply(
    lambda x: " ".join(x.split(" ")[::-1])
)
attribute_res = attribute_res[["model_name", "attr_name", "Mean_Acc"]]


attribute_res = attribute_res.sort_values(
    by=["model_name", "attr_name"], ascending=False
)

attribute_res = pd.concat(
    [
        attribute_res.iloc[21:].reset_index(drop=True),
        attribute_res.iloc[:21].reset_index(drop=True),
    ],
    axis=1,
    ignore_index=True,
)
print(attribute_res.columns)
attribute_res = attribute_res.drop(columns=[0, 3, 4])
attribute_res.columns = ["Attribute", "Setup 1 Accuracy (%)", "Setup 2 Accuracy (%)"]
attribute_res = attribute_res.sort_values(by=["Setup 1 Accuracy (%)"], ascending=False)

# r100-arcface-emore	Wide Nose	83.43	Balanced_Softmax	Wide Nose	86.09
# r100-arcface-emore	Wavy Hair	89.73	Balanced_Softmax	Wavy Hair	91.22
attribute_res.head()

RangeIndex(start=0, stop=6, step=1)


Unnamed: 0,Attribute,Setup 1 Accuracy (%),Setup 2 Accuracy (%)
17,Blonde Hair,97.02,96.63
10,Red Hair,96.33,96.83
6,Type2,96.22,95.83
14,Gray Hair,94.85,95.83
20,Bald Hair,94.75,95.7


In [8]:
attribute_res
print(
    attribute_res.to_latex(
        index=False, caption="Table Caption", label="sampletableref", na_rep=""
    )
)

\begin{table}
\centering
\caption{Table Caption}
\label{sampletableref}
\begin{tabular}{lll}
\toprule
     Attribute & Setup 1 Accuracy (\%) & Setup 2 Accuracy (\%) \\
\midrule
  Blonde  Hair &                97.02 &                96.63 \\
     Red  Hair &                96.33 &                96.83 \\
        Type2  &                96.22 &                95.83 \\
    Gray  Hair &                94.85 &                95.83 \\
    Bald  Hair &                94.75 &                95.70 \\
    Wavy  Hair &                94.32 &                95.50 \\
   Brown  Hair &                94.25 &                94.83 \\
        Type6  &                93.77 &                94.77 \\
  Narrow  Nose &                92.92 &                94.77 \\
        Type5  &                92.15 &                94.38 \\
   Curly  Hair &                92.02 &                93.63 \\
   Small  Lips &                91.92 &                94.98 \\
        Type3  &                91.72 &                

## Table 4. Subgroup-based face verification performance of RFW using training setup 1, sorted by descending order of accuracy.


In [7]:
ratios = pd.read_parquet("fair_face_challenge_evaluation/pair_selection/m43_ratios.pq")
subgroup_codes= pd.read_csv('fair_face_challenge_evaluation/subgroup_feature_codes.csv')

a = read_edit(
    "validation/RFW_Subgroup_Based/40K_new/bupt_cos_dist_6k/attribute_rfw_test_results.log"
)

b = read_edit(
    "validation/RFW_Subgroup_Based/40K_new/vgg_cos_dist_6k/attribute_rfw_test_results.log"
)
df = pd.concat([a, b])

df['features'] = df.attr_name.apply(lambda x:x.split('_')[-2]).astype('str')

subgroup_codes['features'] = subgroup_codes['features'].astype('str')
df = df.merge(subgroup_codes[['skintype',	'meta_skin','lips',	'eye',	'nose',	'hairtype','features']].drop_duplicates('features'),on='features')
ratios['features'] = ratios['index'].astype('str')
df = df.merge(ratios,on='features')
df = df.rename(columns={"hairtype": "Hair Type", "meta_skin": "Skin Type",'lips':'Lips','nose':'Nose','eye':'Eye'})

#df[['Acc'	,'attr_code']].sort_values('Acc',ascending=False)

# df["Acc"] = (df["Acc"] * 100).round(2).astype(str) 

#df = df.pivot(columns=["model_name"], index=["features"])
df = df.rename(
    columns={"Balanced_Softmax": "Setup 2  Accuracy (%)", "r100-arcface-emore": "Setup 1 Accuracy (%)"}
)
df = df.reset_index()



df = df[["Skin Type", "Lips", "Eye", "Nose", "Hair Type", "model_name", "Acc","Ratio"]]
df = df.sort_values(by="Acc", ascending=False).reset_index()

# df["Skin Type"] = (
#     df["Skin Type"]
#     .str.replace("2", "{5,6}")
#     .str.replace("1", "{3,4}")
#     .str.replace("0", "{1,2}")
# )
df


# attrs.columns = attrs.iloc[0].apply(lambda x: (x.split("-")[0]).title())
# attrs = attrs.applymap(lambda x: x.split("-")[1].title())
# attrs = attrs.rename(columns={"Hairtype": "Hair Type", "Skin": "Skin Type"})
# df = pd.concat([df, attrs], axis=1)
# df = (
#     df.drop(columns=["attr_name"])
#     .set_index(attrs.columns.to_list())
#     .sort_index(level=attrs.columns)
# )
# # m43 = m43[["Skin Type", "Lips", "Eye", "Nose", "Hair Type", "Setup 1", "Setup 2"]]
# df = df.sort_values(by="Setup 2", ascending=False).reset_index()

# df["Skin Type"] = (
#     df["Skin Type"]
#     .str.replace("2", "{5,6}")
#     .str.replace("1", "{3,4}")
#     .str.replace("0", "{1,2}")
# )
# df

# m43 = m43.drop(columns=["Std", "len"])

# m43 = m43.sort_values("Acc",ascending=False)
# print("std:", np.std(m43.Acc.values,ddof=1) * 100)
# print("bias:", (1-m43.Acc.values.min()) / (1-m43.Acc.values.max()))



# # m43['attr_code'] = m43.attr_name.apply(lambda x:x.split('_')[-2])
# # m43[['Mean_Acc'	,'attr_code']].sort_values('Mean_Acc',ascending=False)
# #m43 = m43.drop(columns=["Acc"])
# m43["Mean_Acc"] = (m43["Mean_Acc"] * 100).round(2).astype(str) + "%"
# m43 = m43.pivot(columns=["model_name"], index=["attr_name"])
# m43.columns = m43.columns.droplevel(0)
# # TODO: verify w/ Seyma
# m43 = m43.rename(
#     columns={"Balanced_Softmax": "Setup 2", "r100-arcface-emore": "Setup 1"}
# )
# m43 = m43.reset_index()
# attrs = m43["attr_name"].str.split("_", expand=True)
# attrs = attrs.iloc[:, :6]


# attrs.columns = attrs.iloc[0].apply(lambda x: (x.split("-")[0]).title())
# attrs = attrs.drop(columns=["Meta"])
# attrs = attrs.applymap(lambda x: x.split("-")[1].title())
# attrs = attrs.rename(columns={"Hairtype": "Hair Type", "Skin": "Skin Type"})
# m43 = pd.concat([m43, attrs], axis=1)
# m43 = (
#     m43.drop(columns=["attr_name"])
#     .set_index(attrs.columns.to_list())
#     .sort_index(level=attrs.columns)
# )
# # m43 = m43[["Skin Type", "Lips", "Eye", "Nose", "Hair Type", "Setup 1", "Setup 2"]]
# m43 = m43.sort_values(by="Setup 2", ascending=False).reset_index()

# m43["Skin Type"] = (
#     m43["Skin Type"]
#     .str.replace("2", "{5,6}")
#     .str.replace("1", "{3,4}")
#     .str.replace("0", "{1,2}")
# )
# m43

ImportError: Unable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:
 - Missing optional dependency 'pyarrow'. pyarrow is required for parquet support. Use pip or conda to install pyarrow.
 - Missing optional dependency 'fastparquet'. fastparquet is required for parquet support. Use pip or conda to install fastparquet.

In [None]:
half_ix = int(m43.shape[0] / 2 + 0.5)
m43 = pd.concat(
    [m43.iloc[:half_ix], m43.iloc[half_ix:].reset_index(drop=True)],
    axis=1,
    ignore_index=False,
)
print(
    m43.to_latex(
        index=False,
        na_rep="",
    )
)

In [None]:
np.asarray(
    [
        m43[m43["Skin Type"] == "0"]["Setup 2"].mean(),
        m43[m43["Skin Type"] == "1"]["Setup 2"].mean(),
        m43[m43["Skin Type"] == "1"]["Setup 2"].mean(),
    ]
).std() * 100

In [None]:
m43[m43.Skin == "[1,2]"].Acc.values.astype(float).mean()

In [None]:
m43.group = m43.group.apply(lambda x: Path(x).stem)

m43_features = m43.group.str.replace("meta_skin", "skin").str.split("_", expand=True)
m43_features = m43_features.iloc[:, :-3]
m43_features = m43_features.applymap(lambda x: x.split("-")[1])
m43_features = m43_features.select_dtypes(["object"]).apply(
    pd.Series.astype, dtype="category"
)
m43 = pd.concat([m43_features, m43.iloc[:, 1:]], axis=1)

m43["Acc"] = m43["Acc"].apply(lambda x: f"{100*x:.2f}")
feature_cols = ["Skin", "Lips", "Eye", "Nose", "Hair Type"]
m43.columns = feature_cols + ["Acc"]
m43 = m43.drop_duplicates(subset=feature_cols)

In [None]:
ratios = pd.read_parquet("temp/m43_ratios.pq")

In [None]:
ratios = ratios.rename(columns={"meta_skin": "skin", "hairtype": "Hair Type"})
ratios["skin"] = ratios["skin"].astype(str)
ratios.columns = ratios.columns.map(lambda x: x.title())
ratios["Ratio"] = ratios["Ratio"].apply(lambda x: f"{100*x:.2f}")

In [None]:
m43 = m43.merge(ratios, on=feature_cols, how="left", validate="1:1")

In [None]:
sum(ratios.Ratio.astype(np.float)), sum(m43.Ratio.astype(np.float))

In [None]:
m43 = m43.applymap(lambda x: x.title())
m43["Eye"] = m43["Eye"].replace({"Narrow": "Monolid", "Normal": "Other"})
# m43[feature_cols] = m43[feature_cols].columns.map(lambda x: x[0]) + m43[feature_cols].applymap(lambda x: x[0])
m43[feature_cols] = m43[feature_cols].applymap(lambda x: x[0])
m43["Skin"] = (
    m43["Skin"].str.replace("0", "[1,2]").replace("1", "[3,4]").replace("2", "[5,6]")
)

In [None]:
df = m43.copy()
df = df.rename(columns={"Acc": "Accuracy (%)", "Ratio": "Ratio (%)"})

df = df[
    [
        "Skin",
        "Lips",
        "Eye",
        "Nose",
        "Hair Type",
        "Ratio (%)",
        "Accuracy (%)",
    ]
]

In [None]:
def get_col_index(col):
    rotated_col = "\rot{" + col + "}"
    if col in feature_cols:
        return ("Feature", rotated_col)
    return (" ", rotated_col)


def rot_cols(col):
    return "\rot{" + col + "}"


# multi_index = pd.MultiIndex.from_tuples(map(get_col_index, df.columns))
# df.columns = multi_index
cols = list(map(rot_cols, df.columns))
df.columns = cols
# df = df.sort_values(by=df.columns.tolist())

In [None]:
df[df["\rot{Skin}"] == "[1,2]"]["\rot{Accuracy (%)}"].values.astype("float").mean()

In [None]:
half_ix = math.ceil(df.shape[0] / 2)


df = pd.concat(
    [
        df.iloc[:half_ix].reset_index(drop=True),
        df.iloc[half_ix:].reset_index(drop=True),
    ],
    axis=1,
    ignore_index=True,
)

# df.columns = pd.MultiIndex.from_tuples(multi_index.to_list()*2)
df.columns = cols * 2

In [None]:
# single_col_widths = [5] + [1] * 4 + [6, 6]
# single_col_widths = list(map(lambda x: "p{" + str(x) + "mm}", single_col_widths))
# single_col_widths = "".join(single_col_widths)
# multi_col_format = "|".join([single_col_widths] * 2)
multi_col_format = "c" * len(df.columns)
with pd.option_context("max_colwidth", 1000):
    latex = (
        df.to_latex(
            index=False,
            header=True,
            caption="Table Caption",
            label="sampletableref",
            #             multicolumn=True,
            na_rep="",
            column_format=multi_col_format,
            #         column_format='p{3.5cm}|p{5cm}|p{3.5cm}|p{5cm}',
            escape=False,
        )
        #     .replace("\$|\$", "$|$")
        #     .replace("\\textbackslash ", "\\")
        #     .replace("\{", "{")
        #     .replace("\}", "}")
        #         .replace('[(\d),(\d)]','\\left[\1,\2\\right]',regex=True)
        #         .replace('\textbackslash left','\left[')
    )
import re

latex = re.sub("\[(\d,\d)\]", r"$\{\1\}$", latex)
# latex = re.sub("\|", r"\\big|", latex)
latex = re.sub("%", r"\%", latex)
latex = latex.replace(
    r"\label{sampletableref}",
    r"\label{sampletableref} "
    + "\n"
    + r"\setlength\tabcolsep{2pt} "
    + "\n"
    + r"\resizebox{\columnwidth}{!}{",
)
latex = latex.replace(r"\end{tabular}", r"\end{tabular}}")

%clip latex

In [None]:
\begin{table}
\centering
\caption{Table Caption}
\label{sampletableref} 
\setlength\tabcolsep{2pt} 
\resizebox{\columnwidth}{!}{
\begin{tabular}{cccccccccccccc}
\toprule
\rot{Skin} & \rot{Lips} & \rot{Eye} & \rot{Nose} & \rot{Hair Type} & \rot{Ratio (\%)} & \rot{Accuracy (\%)} & \rot{Skin} & \rot{Lips} & \rot{Eye} & \rot{Nose} & \rot{Hair Type} & \rot{Ratio (\%)} & \rot{Accuracy (\%)} \\
\midrule
     $[1,2]$ &          S &         O &          N &               S &            3.82 &              96.53 &      $[3,4]$ &          B &         M &          W &               S &            1.55 &              91.63 \\
     $[3,4]$ &          S &         O &          N &               S &            7.43 &              96.45 &      $[1,2]$ &          S &         O &          N &               B &            0.28 &              91.29 \\
     $[3,4]$ &          S &         O &          N &               W &            3.67 &              96.11 &      $[5,6]$ &          B &         O &          N &               C &            1.97 &              91.23 \\
     $[1,2]$ &          S &         O &          W &               S &            3.03 &              95.63 &      $[3,4]$ &          S &         O &          W &               B &            1.68 &              91.01 \\
     $[1,2]$ &          S &         O &          N &               W &            1.64 &              95.62 &      $[1,2]$ &          B &         O &          N &               W &            0.27 &              90.74 \\
     $[1,2]$ &          B &         O &          N &               S &            0.70 &              95.59 &      $[3,4]$ &          S &         M &          W &               W &            0.96 &              90.17 \\
     $[3,4]$ &          B &         O &          N &               S &            3.59 &              95.28 &      $[1,2]$ &          S &         O &          W &               B &            0.46 &              89.78 \\
     $[3,4]$ &          B &         O &          W &               S &            4.47 &              94.98 &      $[5,6]$ &          S &         O &          N &               C &            0.81 &              89.50 \\
     $[3,4]$ &          S &         O &          W &               W &            2.95 &              94.92 &      $[3,4]$ &          S &         M &          N &               W &            1.20 &              89.35 \\
     $[3,4]$ &          S &         O &          W &               S &            8.83 &              94.92 &      $[5,6]$ &          B &         O &          W &               C &           13.09 &              89.18 \\
     $[1,2]$ &          B &         O &          W &               S &            0.33 &              94.87 &      $[3,4]$ &          B &         O &          W &               B &            0.80 &              86.02 \\
     $[1,2]$ &          S &         O &          W &               W &            0.72 &              94.56 &      $[5,6]$ &          S &         O &          W &               B &            0.99 &              85.90 \\
     $[3,4]$ &          S &         O &          W &               C &            0.51 &              93.89 &      $[3,4]$ &          B &         O &          W &               C &            0.46 &              85.38 \\
     $[3,4]$ &          B &         O &          W &               W &            1.90 &              93.41 &      $[3,4]$ &          S &         M &          N &               B &            0.32 &              84.10 \\
     $[3,4]$ &          B &         O &          N &               W &            1.94 &              93.10 &      $[5,6]$ &          S &         O &          N &               B &            0.30 &              82.81 \\
     $[3,4]$ &          S &         O &          N &               B &            0.68 &              92.50 &      $[3,4]$ &          S &         M &          W &               B &            0.52 &              82.67 \\
     $[3,4]$ &          S &         O &          N &               C &            0.31 &              92.45 &      $[3,4]$ &          B &         M &          N &               W &            0.43 &              82.04 \\
     $[5,6]$ &          S &         O &          W &               C &            2.81 &              92.23 &      $[5,6]$ &          B &         O &          N &               B &            0.53 &              81.24 \\
     $[3,4]$ &          S &         M &          W &               S &            6.59 &              91.93 &      $[1,2]$ &          S &         M &          N &               S &            0.47 &              81.04 \\
     $[3,4]$ &          B &         M &          N &               S &            1.81 &              91.78 &      $[3,4]$ &          B &         M &          W &               W &            0.27 &              79.47 \\
     $[5,6]$ &          B &         O &          W &               B &            3.62 &              91.74 &      $[5,6]$ &          B &         O &          W &               W &            0.32 &              78.94 \\
     $[3,4]$ &          S &         M &          N &               S &            7.95 &              91.70 &            &            &           &            &                 &                 &                    \\
\bottomrule
\end{tabular}}
\end{table}

# RESIZE

\resizebox{\columnwidth}{!}{
\begin{tabular}{p{5mm}p{1mm}p{1mm}p{1mm}p{1mm}p{7mm}p{5mm}p{1mm}p{1mm}p{1mm}p{1mm}p{7mm}}


--

\hline
\multicolumn{6}{l}{Bias}     &   8.683  &   \multicolumn{6}{l}{STD}    &   2.223 \\

In [None]:
df.columns

In [None]:
t = m43.melt(id_vars=feature_cols, var_name="Metric").pivot_table(
    index=["Metric"], columns=feature_cols, aggfunc="first"
)
t.columns = t.columns.droplevel(0)
print(t["1"].to_latex())
print(t["3"].to_latex())
print(t["2"]["Small"].to_latex())
print(t["2"]["Big"].to_latex())

In [None]:
types = [f"S{i}{i+1}" for i in range(1, 7, 2)]
noses = ["NN", "NW"]
lips = ["BL", "SL"]
eyes = ["NN", "WE"]
hairtypes = ["HB", "HW", "HC", "HS"]
pd.MultiIndex.from_product([types, noses, lips, eyes]).shape
df = (
    pd.DataFrame(
        np.ones(24).astype(float).reshape(1, 24),
        columns=pd.MultiIndex.from_product([types, noses, lips, eyes]),
    )
    .round(2)
    .T
)
# print(df.to_latex())
df.index = df.index.map(lambda x: "/".join(x))
# print(df.T.to_latex())
df.columns = ["Acc"]
df = df.reset_index().rename(columns={"index": "Feature"})
df.shape
df = pd.concat(
    [df.iloc[:12].reset_index(drop=True), df.iloc[12:].reset_index(drop=True)],
    axis=1,
    ignore_index=True,
)
df.columns = ["Feature", "Acc", "Feature", "Acc"]

In [None]:
df

In [None]:
data = pd.read_csv('/mnt/SSD/FacialPhenotypes/test_assets/pairs/AttributeCrossPairs/vgg_covariance_matrix_results.log',header=None,sep='\s+',names=['p1','p2','acc','tpr', 'fpr', 'fnr', 'tnr'])
print(data.shape)
import math
data['fpr_log'] = data['fpr'].apply(lambda x:  0 if x == 0 else  math.log10(x) )
data

In [None]:
data_sym = pd.concat([data[['p1','p2','fpr_log']],data[['p2','p1','fpr_log']].rename(columns={'p2':'p1','p1':'p2'})],axis=0, ignore_index=True)
data_sym.head()

In [None]:
renaming = {'Monolid':'monolid eye',
 'bald':'bald',
 'big' : 'full lips',
 'black' :'black hair',
 'blonde':'blonde hair',
 'brown':'brown hair',
 'curly':'curly hair',
 'gray':'gray hair',
 'red':'red hair',
 'small':'small lips',
 'straight':'straight hair',
 'type1': 'type 1 skin',
 'type2':'type 2 skin',
 'type3':'type 3 skin',
 'type4':'type 4 skin',
 'type5':'type 5 skin',
 'type6':'type 6 skin',
 'wavy':'wavy hair',
 'Other': 'other eye',
 'narrow': 'narrow nose',
 'wide':'wide nose'
 }

In [None]:
data_sym['p1'] = data_sym['p1'].replace(renaming).str.title()
data_sym['p2'] = data_sym['p2'].replace(renaming).str.title()

In [None]:
def triu_anti(m, k=0):
    m = np.asanyarray(m)
    mask = np.fliplr(np.tri(*m.shape[-2:], k=k-1, dtype=bool))
    return np.where(mask, np.zeros(1, m.dtype), m)


In [None]:
order = [
'Type 6 Skin',
'Type 5 Skin',
'Type 4 Skin',
'Type 3 Skin',
'Type 2 Skin',
'Monolid Eye',
'Other Eye',
'Narrow Nose',
'Wide Nose', 
'Small Lips',
'Full Lips',
'Wavy Hair', 
'Straight Hair',
'Gray Hair',
'Curly Hair',
'Brown Hair',
'Blonde Hair',
'Black Hair',
'Bald',
]

In [None]:
pxd = data_sym.pivot_table(index='p2',columns='p1',values='fpr_log')
pxd = pxd.drop(index=['Type 1 Skin','Red Hair'],columns=['Type 1 Skin','Red Hair'])
pxd = pxd.reindex(order[::-1])
pxd = pxd[order]
pxd.index.name = 'Phenotype Attributes'
pxd.columns.name = 'Phenotype Attributes'
trilmask = (triu_anti(np.ones(pxd.shape,dtype=int),k=1) == 1)
pxd[trilmask] = np.nan
# pxd = pxd.apply(np.log10)

In [None]:
plt.style.use(["science", "ieee", "vibrant", "grid"])
plt.rcParams["grid.alpha"] = 0.35
plt.rcParams["grid.color"] = "#000000"
plt.rcParams["xtick.color"] = "#000000"


fig, ax = plt.subplots(figsize=(7, 5.25))
ax.autoscale(tight=True)

im = ax.imshow(pxd, cmap='RdBu_r')

# We want to show all ticks...
ax.set_xticks(np.arange(len(pxd.columns)))
ax.set_yticks(np.arange(len(pxd.index)))
# ... and label them with the respective list entries
ax.set_xticklabels(pxd.columns)
ax.set_yticklabels(pxd.index)

# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")

vals = pxd.values
# Loop over data dimensions and create text annotations.
for i in range(vals.shape[0]):
    for j in range(vals.shape[1]):
        text_color = 'w'
        if -1.93 > vals[i, j] > -2.34:
            text_color = 'gray'
        text = ax.text(j, i,"{:.2f}".format(vals[i, j]),
                       ha="center", va="center", color=text_color,fontsize='x-small')

# ax.set_title("asdasdas")
fig.tight_layout()

ax.grid(False)
ax.set_axisbelow(True)
ax.tick_params(which='minor', width=0, direction = 'out')
ax.tick_params(which='major', direction = 'out')

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
fig.savefig('matched-covariance.pdf',dpi=300)
fig.savefig('matched-covariance.svg',dpi=300)
plt.show()

In [None]:
import plotly.express as px
fig = px.imshow(pxd, color_continuous_scale='RdBu_r')
fig.update_layout(width=800, height=800)
fig.update_layout(paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',template="simple_white")
fig.show()

In [None]:
arr =data[['p1','p2','fpr_log']].values
from collections import Counter

all_frames = [] 
for i in range(0,data.shape[0],21):
    new_frame = []
    single_attr = arr[i:i+22]
    a = list(single_attr[:,0])+list(single_attr[:,1])
    main = Counter(a)
    main = main.most_common(1)[0][0]

    for ii in single_attr:
 
        if ii[0]==main:
            new_frame.append([main,ii[1],ii[2]])
        elif ii[1]==main:
            new_frame.append([main,ii[0],ii[2]])
    all_frames.append(pd.DataFrame(new_frame))
    


In [None]:
all_frames = pd.concat(all_frames)
all_frames = all_frames.rename(columns={0:'p1',1:'p2',2:'fmr'})
all_frames

In [None]:
'p'
import plotly.graph_objects as go

trace = go.Heatmap(
   x = all_frames.p1,
   y = all_frames.p2,
   z = all_frames.fmr,
   type = 'heatmap',
   colorscale = 'Viridis'
)
dat = [trace]
fig = go.Figure(data = dat)

fig.show()

In [None]:
21*21


In [None]:
data[(data.p1=='brown') & (data.p2=='Other')]

In [None]:
data[(data.p1=='type3') & (data.p2=='type6')]

In [None]:
data[(data.p1=='type3') & (data.p2=='type3')]

In [None]:
data[(data.p1=='type4') & (data.p2=='type4')]

In [None]:
data[(data.p1=='type6') & (data.p2=='type6')]