# fastMRI dataset statistics

The created `csv` files contain meta information. Information is extracted here to get statistics about the datasets for further analysis, e.g. acquisition type (`type`), MR scanner (`systemModel`), acceleration factor (`acc`).

In [12]:
import pandas as pd
import matplotlib

matplotlib.rcParams['font.family'] = "monospace"

anatomy = 'brain'
dataset = 'multicoil_val'

df_raw = pd.read_csv(f'./resources/{anatomy}/{dataset}.csv')

df = df_raw.replace(to_replace=1.4940, value=1.5)
df = df.replace(to_replace=2.8936, value=3)
df = df.replace(to_replace=2.89362, value=3)

#df.groupby(['acquisition', 'systemVendor', 'systemModel', 'systemFieldStrength_T']).count()
#df = df.groupby(['systemModel', 'systemFieldStrength_T']).count()
#df = df.groupby(['acquisition']).count()
df = df.groupby(['systemModel']).count() # investigate scanner model distribution in the fastMRI dataset
df

1378


Unnamed: 0_level_0,filename,acquisition,systemVendor,systemFieldStrength_T,receiverChannels,institutionName,enc_x,enc_y,enc_z,rec_x,...,rec_z_mm,nPE,acc,num_low_freq,TR,TE,TI,flipAngle_deg,sequence_type,echo_spacing
systemModel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Aera,291,291,291,291,291,291,291,291,291,291,...,291,291,291,291,291,291,291,291,291,291
Avanto,312,312,312,312,312,312,312,312,312,312,...,312,312,312,312,312,312,312,312,312,312
Biograph_mMR,149,149,149,149,149,149,149,149,149,149,...,149,149,149,149,149,149,149,149,149,116
Prisma_fit,123,123,123,123,123,123,123,123,123,123,...,123,123,123,123,123,123,123,123,123,97
Skyra,364,364,364,364,364,364,364,364,364,364,...,364,364,364,364,364,364,364,364,364,274
TrioTim,139,139,139,139,139,139,139,139,139,139,...,139,139,139,139,139,139,139,139,139,65


## Plot Dataset Distribution for Knee and Neuro Dataset (separately)

In [55]:
import plotly.express as px

names = list(df.index.get_level_values(0).unique())
tesla = list(df.index.get_level_values(1))
tesla = [str(t) + ' T' for t in tesla]
tesla = {f:d for f,d in zip(names, tesla)}

fig = px.bar(df, x=names, y=df['filename'], color=tesla, title=f'Anatomy: knee')
fig.update_layout(
    xaxis_title="Scanner Model", 
    yaxis_title="Num. of Data Samples", 
    legend_title="Field Strength",
    font=dict(
        family="Times New Roman",
        size=28,
        color="black"
    )
)
fig.show()

In [64]:
import plotly.express as px

names = list(df.index.get_level_values(0).unique())
# tesla info identical to above

fig = px.bar(df, x=names, y=df['filename'], title=f'Anatomy: knee')
fig.update_layout(
    xaxis_title="Sequence", 
    yaxis_title="Num. of Data Samples",
    font=dict(
        family="Times New Roman",
        size=28,
        color="black"
    )
)
fig.show()

['CORPDFS_FBK', 'CORPD_FBK']


In [3]:
fig.write_image(".figures/scanner_distribution.png")