<a href="https://colab.research.google.com/github/Ahsanbeg/Multiple-Disease-Prediction/blob/main/Thyroid.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install scikit-learn==1.0.2

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

%matplotlib inline
sns.set(rc={'figure.figsize': [20, 20]}, font_scale=1.4)

In [None]:
df = pd.read_csv("C:/Users/suram_ac9ahha/Downloads/medical_diagnosis/Datasets/hypothyroid.csv")
df

In [None]:
df.head()


In [None]:
df.describe().T


In [None]:
df.info()



In [None]:
df


In [None]:
df["binaryClass"].value_counts()


In [None]:
df["binaryClass"]=df["binaryClass"].map({"P":0,"N":1})


In [None]:
df["pregnant"].value_counts()


In [None]:
df=df.replace({"t":1,"f":0})

In [None]:
df


In [None]:
df['sex'].isnull().sum()


In [None]:
df["TBG"].value_counts()


In [None]:
del df["TBG"]


In [None]:
df=df.replace({"?":np.NAN})


In [None]:
df.isnull().sum()


In [None]:
df["sex"].value_counts()


In [None]:
df=df.replace({"F":1,"M":0})


In [None]:
df["referral source"].value_counts()


In [None]:
df = df.drop(["referral source"], axis=1)

In [None]:
df.info()


In [None]:
df["T3 measured"].value_counts()


In [None]:
df["TT4 measured"].value_counts()


In [None]:
df["FTI measured"].value_counts()


In [None]:
df["TBG measured"].value_counts()


In [None]:
df["binaryClass"].value_counts()


In [None]:
df.dtypes

In [None]:
cols = df.columns[df.dtypes.eq('object')]
df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')
df.dtypes

In [None]:
df.isnull().sum()

In [None]:
df['T4U measured'].mean()

In [None]:
df['T4U measured'].fillna(df['T4U measured'].mean(), inplace=True)

In [None]:
df['sex'].fillna(df['sex'].mean(), inplace=True)

In [None]:
df['age'].fillna(df['age'].mean(), inplace=True)

In [None]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy='mean')

In [None]:
df['TSH'] = imputer.fit_transform(df[['TSH']])

In [None]:
df['T3'] = imputer.fit_transform(df[['T3']])

In [None]:
df['TT4'] = imputer.fit_transform(df[['TT4']])

In [None]:
df['T4U'] = imputer.fit_transform(df[['T4U']])

In [None]:
df['FTI'] = imputer.fit_transform(df[['FTI']])

In [None]:
df.isnull().sum()

In [None]:
df

In [None]:
df.columns

In [None]:
df.to_csv('prepocessed_hyperthyroid.csv')

In [None]:
x = df.drop('binaryClass', axis=1)
y = df['binaryClass']

In [None]:
df.head()

In [None]:
x.head()

In [None]:
y.head()

In [None]:

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
print(x.shape, x_train.shape, x_test.shape)

## **Feature Selection- Dropping constant features**


In [None]:
x_train = x_train.drop(['FTI', 'FTI measured', 'T4U measured', 'TT4 measured','query on thyroxine','on antithyroid medication','sick', 'pregnant','thyroid surgery','I131 treatment', 'query hypothyroid', 'query hyperthyroid', 'lithium', 'goitre', 'tumor', 'hypopituitary','psych' , 'TSH measured', 'T4U', 'TBG measured'],axis=1)
x_test = x_test.drop(['FTI', 'FTI measured', 'T4U measured', 'TT4 measured','query on thyroxine','on antithyroid medication','sick', 'pregnant','thyroid surgery','I131 treatment', 'query hypothyroid', 'query hyperthyroid', 'lithium', 'goitre', 'tumor', 'hypopituitary','psych' , 'TSH measured', 'T4U', 'TBG measured'],axis=1)

In [None]:
x_train.columns

## **Model Training**

In [None]:
model = LogisticRegression()

In [None]:
model.fit(x_train, y_train)

In [None]:
# accuracy on training data
x_train_prediction = model.predict(x_train)
training_data_accuracy = accuracy_score(x_train_prediction, y_train)

In [None]:
print('Accuracy on Training data : ', training_data_accuracy)

In [None]:
# accuracy on test data
x_test_prediction = model.predict(x_test)
test_data_accuracy = accuracy_score(x_test_prediction, y_test)

In [None]:
print('Accuracy on Test data : ', test_data_accuracy)

In [None]:
input_data = (44,0,0,45,1,1.4,39)

# change the input data to a numpy array
input_data_as_numpy_array= np.asarray(input_data)

# reshape the numpy array as we are predicting for only on instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = model.predict(input_data_reshaped)
print(prediction)

if (prediction[0]== 0):
  print('The Person does not have a HyperThyroid Disease')
else:
  print('The Person has HyperThyroid Disease')

## **Saving the trained model**





In [None]:
import pickle

In [None]:
filename = 'Thyroid_model.sav'
pickle.dump(model, open(filename, 'wb'))

In [None]:
# loading the saved model
loaded_model = pickle.load(open('Thyroid_model.sav', 'rb'))

In [None]:
for column in x_train.columns:
  print(column)

In [None]:
x_train.info()