# Analiza konačnog skupa podataka

In [26]:
#import potrebnih knjižnica
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import nibabel as nib
import glob
import re
import os
import numpy as np
from matplotlib.colors import Normalize
import subprocess

In [15]:
metadata_df = pd.read_csv('../dataset/data/IXI_metadata_for_process.csv')
metadata_df

Unnamed: 0,IXI_ID,SEX_ID,HEIGHT,WEIGHT,ETHNIC_ID,MARITAL_ID,OCCUPATION_ID,QUALIFICATION_ID,DOB,DATE_AVAILABLE,STUDY_DATE,AGE
0,2,2,164,58,1,4,1,5,1970-01-30,1,2005-11-18,35.800137
1,12,1,175,70,1,2,1,5,1966-08-20,1,2005-06-01,38.781656
2,13,1,182,70,1,2,1,5,1958-09-15,1,2005-06-01,46.710472
3,15,1,181,90,2,1,6,5,1981-03-11,1,2005-06-23,24.284736
4,16,1,172,63,1,2,1,5,1950-04-24,1,2005-06-24,55.167693
...,...,...,...,...,...,...,...,...,...,...,...,...
556,648,1,193,120,1,1,6,4,1959-03-08,1,2006-11-27,47.723477
557,651,1,175,61,3,2,8,2,1956-07-09,1,2006-12-01,50.395619
558,652,1,163,80,1,1,1,5,1963-12-05,1,2006-12-01,42.989733
559,653,1,172,100,1,3,1,5,1960-09-14,1,2006-12-04,46.220397


In [16]:
#micanje IXI_ID 430, 433, 462 i 463 iz metapodataka
metadata_df = metadata_df[metadata_df['IXI_ID'] != 430]
metadata_df = metadata_df[metadata_df['IXI_ID'] != 433]
metadata_df = metadata_df[metadata_df['IXI_ID'] != 462]
metadata_df = metadata_df[metadata_df['IXI_ID'] != 463]

metadata_df

Unnamed: 0,IXI_ID,SEX_ID,HEIGHT,WEIGHT,ETHNIC_ID,MARITAL_ID,OCCUPATION_ID,QUALIFICATION_ID,DOB,DATE_AVAILABLE,STUDY_DATE,AGE
0,2,2,164,58,1,4,1,5,1970-01-30,1,2005-11-18,35.800137
1,12,1,175,70,1,2,1,5,1966-08-20,1,2005-06-01,38.781656
2,13,1,182,70,1,2,1,5,1958-09-15,1,2005-06-01,46.710472
3,15,1,181,90,2,1,6,5,1981-03-11,1,2005-06-23,24.284736
4,16,1,172,63,1,2,1,5,1950-04-24,1,2005-06-24,55.167693
...,...,...,...,...,...,...,...,...,...,...,...,...
556,648,1,193,120,1,1,6,4,1959-03-08,1,2006-11-27,47.723477
557,651,1,175,61,3,2,8,2,1956-07-09,1,2006-12-01,50.395619
558,652,1,163,80,1,1,1,5,1963-12-05,1,2006-12-01,42.989733
559,653,1,172,100,1,3,1,5,1960-09-14,1,2006-12-04,46.220397


## 1. Analiza metapodataka (ispitanika za treniranje/testiranje budućih dubokih modela - 15 ispitanika o kojima nemamo podataka)

In [17]:
metadata_df.to_csv('../dataset/data/IXI_metadata_final.csv', index=False)

Kao što smo već vidjeli u prethodnoj analizi, dostupno nam je 576 (577 - IXI014 ispitanik s premalom rezolucijom) T1 i T2 skenova, a imamo samo 561 zapisa u metapodacima. To znači da o 15 ispitanika nemamo nikakve informacije.

Također iz analize ćemo izbaciti sljedeće ispitanike: IXI430, IXI433, IXI462 i IXI463 jer za njih nije uspješno provedena ekstrakcija, registracija i uzimanje aksijalnog 2d presjeka. Ovim ostaje 557 zapisa u tablici.

U nastavku je napravljena eksplorativna analiza o dostupnim informacijama.

Prisjetimo se:

Među ispitanicima postoje sljedeće etničke skupine:
- 1 - White
- 4 - Black or Black British
- 3 - Asian or Asian British
- 5 - Chinese
- 6 - Other

Među ispitanicima postoje sljedeći bračni statusi:
- 1 - Single
- 2 - Married
- 4 - Divorced or Separated
- 3 - Cohabiting
- 5 - Widowed

Među ispitanicima postoje sljedeći zanimanja:
- 1 - Go out to full time employment
- 2 - Go out to part time employment (<25hrs)
- 3 - Study at college or university
- 4 - Full-time housework
- 5 - Retired
- 6 - Unemployed
- 7 - Work for pay at home
- 8 - Other

Među ispitanicima postoje sljedeće kvalifikacije:
- 1 - No qualifications
- 2 - O-levels, GCSEs, or CSEs
- 3 - A-levels
- 4 - Further education e.g. City & Guilds / NVQs
- 5 - University or Polytechnic degree

In [18]:
metadata_df = pd.read_csv('../dataset/data/IXI_metadata_final.csv')
metadata_df

Unnamed: 0,IXI_ID,SEX_ID,HEIGHT,WEIGHT,ETHNIC_ID,MARITAL_ID,OCCUPATION_ID,QUALIFICATION_ID,DOB,DATE_AVAILABLE,STUDY_DATE,AGE
0,2,2,164,58,1,4,1,5,1970-01-30,1,2005-11-18,35.800137
1,12,1,175,70,1,2,1,5,1966-08-20,1,2005-06-01,38.781656
2,13,1,182,70,1,2,1,5,1958-09-15,1,2005-06-01,46.710472
3,15,1,181,90,2,1,6,5,1981-03-11,1,2005-06-23,24.284736
4,16,1,172,63,1,2,1,5,1950-04-24,1,2005-06-24,55.167693
...,...,...,...,...,...,...,...,...,...,...,...,...
552,648,1,193,120,1,1,6,4,1959-03-08,1,2006-11-27,47.723477
553,651,1,175,61,3,2,8,2,1956-07-09,1,2006-12-01,50.395619
554,652,1,163,80,1,1,1,5,1963-12-05,1,2006-12-01,42.989733
555,653,1,172,100,1,3,1,5,1960-09-14,1,2006-12-04,46.220397


In [19]:
metadata_df["SEX_ID"] = metadata_df["SEX_ID"].replace({1: "Muško", 2: "Žensko", 0: "Nepoznato"})
metadata_df["ETHNIC_ID"] = metadata_df["ETHNIC_ID"].replace({1: "White", 4: "Black/Black British", 3: "Asian/Asian British", 5: "Chinese", 6: "Other", 2: "Unknown", 0: "Unknown"})
metadata_df["MARITAL_ID"] = metadata_df["MARITAL_ID"].replace({1: "Single", 2: "Married", 4: "Divorced/Separated", 3: "Cohabiting", 5: "Widowed", 0: "Unknown"})
metadata_df["OCCUPATION_ID"] = metadata_df["OCCUPATION_ID"].replace({1: "Full time employment", 2: "Part time employment", 3: "College/university", 4: "Housework", 5: "Retired", 6: "Unemployed", 7: "Work from home", 8: "Other", 0: "Unknown"})
metadata_df["QUALIFICATION_ID"] = metadata_df["QUALIFICATION_ID"].replace({1: "None", 2: "O-level/GCSE/CSE", 3: "A-level", 4: "Further education/NVQ", 5: "University or Polytechnic degree", 0: "Unknown"})

In [20]:
#analiza distribucije etnične pripadnosti, bračnog statusa, spola, zanimanje i kvalifikacije ispitanika
fig1 = px.pie(metadata_df, names="SEX_ID", title="Distribucija spola ispitanika", hole=0.3)
fig1.update_layout(width=800, height=600)  # Smanjuje širinu i visinu
fig1.show()


In [21]:
fig2 = px.pie(metadata_df, names="ETHNIC_ID", title="Distribucija etičke pripadnosti ispitanika", hole=0.3)
fig2.update_layout(width=800, height=600)
fig2.show()

In [22]:
fig3 = px.pie(metadata_df, names="MARITAL_ID", title="Distribucija bračnog statusa ispitanika", hole=0.3)
fig3.update_layout(width=800, height=600)
fig3.show()

In [23]:
fig4 = px.pie(metadata_df, names="OCCUPATION_ID", title="Distribucija zanimanja ispitanika", hole=0.3)
fig4.update_layout(width=800, height=600)
fig4.show()

In [24]:
fig5 = px.pie(metadata_df, names="QUALIFICATION_ID", title="Distribucija kvalifikacije ispitanika", hole=0.3)
fig5.update_layout(width=800, height=600)
fig5.show()

Iz ovoga možemo zaključiti da su većina ispitanika žene, preko 80% ispitanika je bijele rase te su ispitanici većinom sami ili u braku. Više od 50% ispitanika ima sveučilišnu diplomu, a skoro njih 50% radi puno radno vrijeme.

In [25]:
#distribucija godina ispitanika
fig6 = px.histogram(
    metadata_df, x="AGE", title="Distribucija godina ispitanika",
    nbins=20,
    opacity=0.7, 
)

fig6.update_traces(marker=dict(line=dict(width=1, color='black')))

fig6.update_layout(
    width=800, height=600,
    xaxis_title="Dob ispitanika",
    yaxis_title="Broj ispitanika",
    template="plotly_white"
)

fig6.show()

Vidimo da postoje ispitanici raznih dobnih skupina, a najviše ih je s oko 60tak godina.