In [18]:
import pandas as pd
import plotly.express as px
import os
from plotly.offline import plot, iplot, init_notebook_mode
import plotly.io as pio
pio.renderers

Renderers configuration
-----------------------
    Default renderer: 'plotly_mimetype+notebook_connected'
    Available renderers:
        ['plotly_mimetype', 'jupyterlab', 'nteract', 'vscode',
         'notebook', 'notebook_connected', 'kaggle', 'azure', 'colab',
         'cocalc', 'databricks', 'json', 'png', 'jpeg', 'jpg', 'svg',
         'pdf', 'browser', 'firefox', 'chrome', 'chromium', 'iframe',
         'iframe_connected', 'sphinx_gallery', 'sphinx_gallery_png']

In [19]:
df= pd.read_csv('../DataBases/thyroid/all_conditions.csv')

In [20]:
print(df.describe())


              age          TSH           T3          TT4          T4U  \
count  2799.00000  2516.000000  2215.000000  2616.000000  2503.000000   
mean     51.84423     4.672150     2.024966   109.072401     0.997912   
std      20.46116    21.449453     0.824600    35.392443     0.194390   
min       1.00000     0.005000     0.050000     2.000000     0.310000   
25%      36.00000     0.440000     1.600000    88.000000     0.880000   
50%      54.00000     1.400000     2.000000   104.000000     0.980000   
75%      67.00000     2.600000     2.400000   125.000000     1.080000   
max     455.00000   478.000000    10.600000   430.000000     2.120000   

               FTI  TBG   patient_id  
count  2505.000000  0.0  2800.000000  
mean    110.787984  NaN  1895.603214  
std      32.883986  NaN  1091.130220  
min       2.000000  NaN     1.000000  
25%      93.000000  NaN   955.750000  
50%     107.000000  NaN  1913.000000  
75%     124.000000  NaN  2837.500000  
max     395.000000  NaN  3772.

In [21]:
# In this example, we will only look TSH values are related (Correlation function says they are )
print(df.corr(method='pearson'))

                 age       TSH        T3       TT4       T4U       FTI  TBG  \
age         1.000000 -0.059776 -0.244702 -0.052111 -0.163357  0.037623  NaN   
TSH        -0.059776  1.000000 -0.160890 -0.254791  0.067705 -0.291188  NaN   
T3         -0.244702 -0.160890  1.000000  0.563901  0.463434  0.351013  NaN   
TT4        -0.052111 -0.254791  0.563901  1.000000  0.434248  0.795206  NaN   
T4U        -0.163357  0.067705  0.463434  0.434248  1.000000 -0.173056  NaN   
FTI         0.037623 -0.291188  0.351013  0.795206 -0.173056  1.000000  NaN   
TBG              NaN       NaN       NaN       NaN       NaN       NaN  NaN   
patient_id -0.044519  0.059232 -0.014278  0.033770 -0.025212  0.058891  NaN   

            patient_id  
age          -0.044519  
TSH           0.059232  
T3           -0.014278  
TT4           0.033770  
T4U          -0.025212  
FTI           0.058891  
TBG                NaN  
patient_id    1.000000  


In [22]:
df = df[['age','sex','TSH_measured','TSH']]

In [23]:
#  filtering TSH_Measured values which are True (t)

df = df[df['TSH_measured']=='t']

In [24]:
# filtering sex values only for M and F (removing null, blank and other possible unwanted rows)
df = df[(df['sex'] == 'F') | (df['sex'] == 'M')]
df = df[df['age'] < 110]

In [25]:
fig = px.scatter(df,x=df['age'],y=df['TSH'],facet_col='sex',color='sex')
fig.update_traces(marker=dict(size=12))
fig.update_layout(title_text="Thyroid-stimulating hormone measured in Females and Males with Different Ages")


In [26]:
# Females are more prone to thyroid disease from males. There are TSH measured between the 80-200 are
# relatively high comparing to males

#To see how standart deviation effects these two graphics we can add some std function thanks to pandas.

# mean and std values for females
df['tsh_mean_females'] = df[df['sex'] =='F']['TSH'].mean()
df['tsh_std_females'] = df[df['sex'] =='F']['TSH'].std()
df['upper_limit_std_females'] = df['tsh_mean_females']+df['tsh_std_females']


In [27]:
# mean and std values for males
df['tsh_mean_males'] = df[df['sex'] =='M']['TSH'].mean()
df['tsh_std_males'] = df[df['sex'] =='M']['TSH'].std()
df['upper_limit_std_males'] = df['tsh_mean_males']+df['tsh_std_males']

In [28]:
fig = px.scatter(df,x=df['age'],y=df['TSH'],facet_col='sex',color='sex')
fig.add_scatter(x=df['age'],y=df['tsh_mean_females'],name='Mean Tsh Values')
fig.add_scatter(x=df['age'],y=df['upper_limit_std_females'],name='Upper Limit of Tsh Values')
##
fig.add_scatter(x=df['age'],y=df['tsh_mean_males'],name='Mean Tsh Values',row=1,col=2)
fig.add_scatter(x=df['age'],y=df['upper_limit_std_males'],name='Upper Limit of Tsh Values',row=1,col=2)

fig.update_layout(title_text="Thyroid-stimulating hormone, Standart deviation values according to sex")


In [None]:
# The standart deviation of Tsh values in females is 27.28 and 16.11 in males.
# This doesn't mean it's safe for values 27 for the females of course, 
# but it will definitely give an idea about standart levels.