In [4]:
import pandas as pd

graphafold_file = "graphafold_results/results-casp.csv"

df = pd.read_csv(graphafold_file, sep=";")
df.tail(25)

Unnamed: 0,Nazwa pliku,TP,FN,FP,PPV,TPR,INF
0,7ZJ4.amt,39,21,7,8478260869565220,65,742352312936209
1,8TVZ.amt,39,11,2,9512195121951220,78,861365903383803
2,8UYE.amt,12,6,1,9230769230769230,6666666666666670,7844645405527360
3,7PTK.amt,77,11,7,9166666666666670,875,8955910525085280
4,8FZA.amt,26,18,13,6666666666666670,5909090909090910,6276459144608480
5,8UYS.amt,26,4,0,10,8666666666666670,9309493362512630
6,7YR7.amt,14,8,9,6086956521739130,6363636363636360,622375914264142
7,7YR6.amt,9,1,3,75,9,8215838362577490
8,8UYG.amt,4,10,2,6666666666666670,2857142857142860,4364357804719850
9,8BTZ.amt,12,0,0,10,10,10


### Calculate F1-score

In [5]:
tp = df['TP'].values
fn = df['FN'].values
fp = df['FP'].values
f1_scores = 2 * tp / (2 * tp + fp + fn)
df['F1'] = f1_scores
df['F1'] = df['F1'].round(2)
df.head()

Unnamed: 0,Nazwa pliku,TP,FN,FP,PPV,TPR,INF,F1
0,7ZJ4.amt,39,21,7,8478260869565220,65,742352312936209,0.74
1,8TVZ.amt,39,11,2,9512195121951220,78,861365903383803,0.86
2,8UYE.amt,12,6,1,9230769230769230,6666666666666670,7844645405527360,0.77
3,7PTK.amt,77,11,7,9166666666666670,875,8955910525085280,0.9
4,8FZA.amt,26,18,13,6666666666666670,5909090909090910,6276459144608480,0.63


### Rename PDB Column

In [6]:
# rename columns
df.rename(columns={
    "Nazwa pliku": "PDB"
}, inplace=True)
pdbs = df['PDB'].values
pdbs = [pdb.split(".")[0] for pdb in pdbs]
df['PDB'] = pdbs
df.head()

Unnamed: 0,PDB,TP,FN,FP,PPV,TPR,INF,F1
0,7ZJ4,39,21,7,8478260869565220,65,742352312936209,0.74
1,8TVZ,39,11,2,9512195121951220,78,861365903383803,0.86
2,8UYE,12,6,1,9230769230769230,6666666666666670,7844645405527360,0.77
3,7PTK,77,11,7,9166666666666670,875,8955910525085280,0.9
4,8FZA,26,18,13,6666666666666670,5909090909090910,6276459144608480,0.63


### Calculate Precision and Recall


In [7]:
precision = tp / (tp + fp)
recall = tp / (tp + fn)
df['Precision'] = precision
df['Recall'] = recall
df.head()

Unnamed: 0,PDB,TP,FN,FP,PPV,TPR,INF,F1,Precision,Recall
0,7ZJ4,39,21,7,8478260869565220,65,742352312936209,0.74,0.847826,0.65
1,8TVZ,39,11,2,9512195121951220,78,861365903383803,0.86,0.95122,0.78
2,8UYE,12,6,1,9230769230769230,6666666666666670,7844645405527360,0.77,0.923077,0.666667
3,7PTK,77,11,7,9166666666666670,875,8955910525085280,0.9,0.916667,0.875
4,8FZA,26,18,13,6666666666666670,5909090909090910,6276459144608480,0.63,0.666667,0.590909


### Save the DataFrame to a CSV file


In [17]:
df_save = df[['PDB', 'Precision', 'Recall', 'F1', 'INF']]
df_save['INF'] = df_save['INF'].apply(lambda x: float(x.replace(",", ".")))
# round values in df_save
df_save = df_save.round(2)
# order by PDB column
df_save = df_save.sort_values(by=['PDB'])
df_save.head(20)
df_save.to_csv("graphafold_results.csv", index=False, sep=",")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_save['INF'] = df_save['INF'].apply(lambda x: float(x.replace(",", ".")))
