## Thyroid Disease 

The most common thyroid disorder is hypothyroidism. Hypo- means deficient or under(active), so hypothyroidism is a condition in which the thyroid gland is underperforming or producing too little thyroid hormone.. Recognizing the symptoms of hypothyroidism is extremely important.





<img src="https://i.imgur.com/5cW3lGZ.gif?noredirect" width="800px">


### Data Set Information:



#### From Garavan Institute
#### Documentation: as given by Ross Quinlan
#### 6 databases from the Garavan Institute in Sydney, Australia
#### Approximately the following for each database:

* 2800 training (data) instances and 972 test instances
* Plenty of missing data
* 29 or so attributes, either Boolean or continuously-valued

#### 2 additional databases, also from Ross Quinlan, are also here

* Hypothyroid.data and sick-euthyroid.data
* Quinlan believes that these databases have been corrupted
* Their format is highly similar to the other databases

#### 1 more database of 9172 instances that cover 20 classes, and a related domain theory
#### Another thyroid database from Stefan Aeberhard

* 3 classes, 215 instances, 5 attributes
* No missing values

#### Dataset link:


##### [Here](https://www.kaggle.com/yasserhessein/thyroid-disease-data-set)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns

%matplotlib inline
sns.set(rc={'figure.figsize': [20, 20]}, font_scale=1.4)

In [None]:
df = pd.read_csv('../input/thyroid-disease-data-set/hypothyroid.csv')
df

In [None]:
df.head()

In [None]:
df.describe().T

In [None]:
df.info()

In [None]:
df

In [None]:
df["binaryClass"].value_counts()

In [None]:
df["binaryClass"]=df["binaryClass"].map({"P":0,"N":1})

In [None]:
df["pregnant"].value_counts()

In [None]:
df=df.replace({"t":1,"f":0})

In [None]:
df

In [None]:
#df['target'].isnull().sum()

In [None]:
df['sex'].isnull().sum()

In [None]:
df["TBG"].value_counts()

In [None]:
del df["TBG"]

In [None]:
df=df.replace({"?":np.NAN})

In [None]:
df.isnull().sum()

In [None]:
df["sex"].value_counts()

In [None]:
df=df.replace({"F":1,"M":0})

In [None]:
df["referral source"].value_counts()

In [None]:
del df["referral source"]

In [None]:
df.info()

In [None]:
df["T3 measured"].value_counts()

In [None]:
df["TT4 measured"].value_counts()

In [None]:
df["FTI measured"].value_counts()

In [None]:
df["TBG measured"].value_counts()

In [None]:
df["binaryClass"].value_counts()

In [None]:
df.dtypes

In [None]:
cols = df.columns[df.dtypes.eq('object')]
df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')
df.dtypes

In [None]:
df.isnull().sum()

In [None]:
df['T4U measured'].mean()

In [None]:
df['T4U measured'].fillna(df['T4U measured'].mean(), inplace=True)

In [None]:
df['sex'].fillna(df['sex'].mean(), inplace=True)

In [None]:
df['age'].fillna(df['age'].mean(), inplace=True)

In [None]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy='mean')

In [None]:
df['TSH'] = imputer.fit_transform(df[['TSH']])

In [None]:
df['T3'] = imputer.fit_transform(df[['T3']])

In [None]:
df['TT4'] = imputer.fit_transform(df[['TT4']])

In [None]:
df['T4U'] = imputer.fit_transform(df[['T4U']])

In [None]:
df['FTI'] = imputer.fit_transform(df[['FTI']])

In [None]:
df.isnull().sum()

In [None]:
df

In [None]:
df.columns

In [None]:
import seaborn as sns

%matplotlib inline
sns.set(rc={'figure.figsize': [8, 8]}, font_scale=1.2)

In [None]:
sns.distplot(df['age'])

In [None]:
sns.distplot(df['sex'])

In [None]:
sns.distplot(df['T3'])

In [None]:
sns.distplot(df['TT4'])

In [None]:
sns.distplot(df['T4U'])

In [None]:
sns.distplot(df['FTI'])

In [None]:
sns.distplot(df['TBG measured'])

In [None]:
sns.jointplot(x='age', y='TT4', data=df, kind='scatter', height=8, color='m')

In [None]:
sns.jointplot(x='age', y='TT4', data=df, kind='reg', height=8, color='m')

In [None]:
sns.distplot(df['age'], kde=False, bins=30, color='m')

In [None]:
sns.countplot(x='binaryClass', data=df, palette='rocket')

In [None]:
sns.countplot(x='binaryClass', data=df, hue='sex', palette='BuPu')

In [None]:
sns.stripplot(x="binaryClass", y="age", data=df, palette="viridis")

In [None]:
sns.boxplot(x='binaryClass', y='age', data=df)

In [None]:
sns.jointplot(x='FTI', y='binaryClass', data=df, kind='scatter', height=8, color='m')

In [None]:
df_corr = df.corr()
df_corr

In [None]:
sns.heatmap(df_corr, cmap='viridis', linecolor='k', linewidths=2, annot=True)

In [None]:
#df.dropna(axis = 0, inplace=True)

In [None]:
x = df.drop('binaryClass', axis=1)
y = df['binaryClass']

In [None]:
x

In [None]:
y

In [None]:
x = sm.add_constant(x)
results = sm.OLS(y,x).fit()
results.summary()

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)

In [None]:
x.shape

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
sc.fit(x_train)
x_train = sc.transform(x_train)
x_test = sc.transform(x_test)

<img src="https://lh3.googleusercontent.com/proxy/dmZ-7K8n0tfjlAlmjL316IQymLkEUejTyRJe_PvnpoIvDzoDixx_4L1JmNBYg0xhdyywrCkkjD9F5Snp7PJU8N6RFZ0ke4IO9wMe5c2xbJvC" width="800px">

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping

In [None]:
x.shape

In [None]:
x.shape[1]

In [None]:
model = Sequential()
model.add(Dense(256, input_shape=[x.shape[1]], activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(63, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(1, activation='sigmoid'))

In [None]:
model.summary()

In [None]:
from tensorflow.keras.utils import plot_model
from IPython.display import Image
plot_model(model, to_file='convnet.png', show_shapes=True,show_layer_names=True)
Image(filename='convnet.png') 

In [None]:
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
lrd = ReduceLROnPlateau(monitor = 'val_loss',
                         patience = 20,
                         verbose = 1,
                         factor = 0.75,
                         min_lr = 1e-10)

mcp = ModelCheckpoint('model.h5')

es = EarlyStopping(verbose=1, patience=20)

In [None]:
%%time
history = model.fit(x=x_train, y=y_train, epochs=100, callbacks=[lrd, mcp, es], batch_size=64, validation_split=0.1)


In [None]:
model.evaluate(x_test, y_test)

In [None]:
y_pred = model.predict(x_test)
y_pred

In [None]:
y_pred[1]

In [None]:
y_test

In [None]:
%%time
model.evaluate(x_test, y_test)

In [None]:
model.predict(x_test)[50]

In [None]:
model.predict_classes(x_test)[50]

In [None]:
y_test.iloc[50]

In [None]:
model.predict(x_test)[70]

In [None]:
model.predict_classes(x_test)[70]

In [None]:
y_test.iloc[70]

In [None]:
model.predict(sc.transform([[42.1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.00,132,1.00,1.00,109.0,1.0,0.88,.100,110.00,0.00,0,1]]))

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')

plt.legend(['train', 'test'], loc='upper left')
plt.show()

# # summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()

In [None]:
model.save('model.h5')

In [None]:
import joblib

In [None]:
joblib.dump(sc, 'scaler.pkl')

In [None]:
sc = joblib.load('scaler.pkl')

In [None]:
%%time
from tensorflow.keras.models import load_model
model = load_model('model.h5')
model.predict(sc.transform([[42.1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.00,132,1.00,1.00,109.0,1.0,0.88,.100,110.00,0.00,0,1]]))