In [1]:
## Importing Libraries

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score,recall_score,f1_score,classification_report, confusion_matrix
from scipy.linalg import pinv2
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

In [2]:
df = pd.read_csv('data.tsv',sep='\t')

In [3]:
df.head()

Unnamed: 0,geneId,geneSymbol,DSI,DPI,diseaseId,diseaseName,diseaseType,diseaseClass,diseaseSemanticType,score,EI,YearInitial,YearFinal,NofPmids,NofSnps,source
0,1,A1BG,0.7,0.538,C0001418,Adenocarcinoma,group,C04,Neoplastic Process,0.01,1.0,2008.0,2008.0,1,0,LHGDN
1,1,A1BG,0.7,0.538,C0002736,Amyotrophic Lateral Sclerosis,disease,C18;C10,Disease or Syndrome,0.01,1.0,2008.0,2008.0,1,0,BEFREE
2,1,A1BG,0.7,0.538,C0003578,Apnea,phenotype,C23;C08,Sign or Symptom,0.01,1.0,2017.0,2017.0,1,0,BEFREE
3,1,A1BG,0.7,0.538,C0003864,Arthritis,disease,C05,Disease or Syndrome,0.01,1.0,2019.0,2019.0,1,0,BEFREE
4,1,A1BG,0.7,0.538,C0008373,Cholesteatoma,disease,C17,Disease or Syndrome,0.01,1.0,2020.0,2020.0,1,0,BEFREE


In [4]:
df['diseaseType'].unique()

array(['group', 'disease', 'phenotype'], dtype=object)

In [5]:
df['diseaseType'].value_counts()

disease      781622
phenotype    211293
group        142027
Name: diseaseType, dtype: int64

In [6]:
df.shape

(1134942, 16)

In [7]:
len(df.diseaseName.value_counts())

30170

In [8]:
df.isnull().sum()

geneId                      0
geneSymbol                  0
DSI                      2584
DPI                      2882
diseaseId                   0
diseaseName                 0
diseaseType                 0
diseaseClass           156403
diseaseSemanticType         0
score                       0
EI                     166974
YearInitial            166974
YearFinal              166974
NofPmids                    0
NofSnps                     0
source                      0
dtype: int64

In [9]:
## seperating Numerical Variables from the dataset.
num_var = df.select_dtypes(exclude='object')

In [10]:
num_var

Unnamed: 0,geneId,DSI,DPI,score,EI,YearInitial,YearFinal,NofPmids,NofSnps
0,1,0.700,0.538,0.01,1.0,2008.0,2008.0,1,0
1,1,0.700,0.538,0.01,1.0,2008.0,2008.0,1,0
2,1,0.700,0.538,0.01,1.0,2017.0,2017.0,1,0
3,1,0.700,0.538,0.01,1.0,2019.0,2019.0,1,0
4,1,0.700,0.538,0.01,1.0,2020.0,2020.0,1,0
...,...,...,...,...,...,...,...,...,...
1134937,115804232,,,0.10,1.0,2019.0,2019.0,1,0
1134938,115891964,0.861,0.077,0.01,1.0,2016.0,2016.0,1,0
1134939,115891964,0.861,0.077,0.01,1.0,2016.0,2016.0,1,0
1134940,115891964,0.861,0.077,0.01,1.0,2016.0,2016.0,1,0


In [11]:
df.DSI.median()

0.534

In [12]:
df.DSI.mean()

0.5416682056381463

In [13]:
df.describe()

Unnamed: 0,geneId,DSI,DPI,score,EI,YearInitial,YearFinal,NofPmids,NofSnps
count,1134942.0,1132358.0,1132060.0,1134942.0,967968.0,967968.0,967968.0,1134942.0,1134942.0
mean,2437042.0,0.5416682,0.6979658,0.06318843,0.975242,2011.138019,2013.94111,2.632643,0.2407973
std,15533310.0,0.1300706,0.2029981,0.09900556,0.1365,7.649745,6.370832,19.49824,5.058252
min,1.0,0.231,0.038,0.01,0.0,1924.0,1959.0,0.0,0.0
25%,3169.0,0.452,0.615,0.01,1.0,2007.0,2011.0,1.0,0.0
50%,6580.0,0.534,0.769,0.02,1.0,2013.0,2017.0,1.0,0.0
75%,27342.0,0.621,0.846,0.1,1.0,2017.0,2019.0,2.0,0.0
max,115892000.0,1.0,0.962,1.0,1.0,2020.0,2020.0,6577.0,2632.0


In [14]:
df.DPI.median()

0.769

In [15]:
df.DPI.mean()

0.6979657933325094

In [16]:
num_var.fillna(num_var.median(),inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  downcast=downcast,


In [17]:
num_var.isnull().sum()

geneId         0
DSI            0
DPI            0
score          0
EI             0
YearInitial    0
YearFinal      0
NofPmids       0
NofSnps        0
dtype: int64

In [18]:
## Sepearting Categorical Variables from the dataset.
cat_var = df.select_dtypes(include='object')

In [19]:
cat_var.head()

Unnamed: 0,geneSymbol,diseaseId,diseaseName,diseaseType,diseaseClass,diseaseSemanticType,source
0,A1BG,C0001418,Adenocarcinoma,group,C04,Neoplastic Process,LHGDN
1,A1BG,C0002736,Amyotrophic Lateral Sclerosis,disease,C18;C10,Disease or Syndrome,BEFREE
2,A1BG,C0003578,Apnea,phenotype,C23;C08,Sign or Symptom,BEFREE
3,A1BG,C0003864,Arthritis,disease,C05,Disease or Syndrome,BEFREE
4,A1BG,C0008373,Cholesteatoma,disease,C17,Disease or Syndrome,BEFREE


In [20]:
cat_var.isnull().sum()

geneSymbol                  0
diseaseId                   0
diseaseName                 0
diseaseType                 0
diseaseClass           156403
diseaseSemanticType         0
source                      0
dtype: int64

In [21]:
df.diseaseClass.mode()

0    C04
dtype: object

In [22]:
## Handling missing categorical variuables
cat_var = cat_var.apply(lambda x: x.fillna(x.value_counts().index[0]))

In [23]:
cat_var.isnull().sum()

geneSymbol             0
diseaseId              0
diseaseName            0
diseaseType            0
diseaseClass           0
diseaseSemanticType    0
source                 0
dtype: int64

In [24]:
cat_var.head()

Unnamed: 0,geneSymbol,diseaseId,diseaseName,diseaseType,diseaseClass,diseaseSemanticType,source
0,A1BG,C0001418,Adenocarcinoma,group,C04,Neoplastic Process,LHGDN
1,A1BG,C0002736,Amyotrophic Lateral Sclerosis,disease,C18;C10,Disease or Syndrome,BEFREE
2,A1BG,C0003578,Apnea,phenotype,C23;C08,Sign or Symptom,BEFREE
3,A1BG,C0003864,Arthritis,disease,C05,Disease or Syndrome,BEFREE
4,A1BG,C0008373,Cholesteatoma,disease,C17,Disease or Syndrome,BEFREE


In [25]:
cat_var.geneSymbol.value_counts()

TNF            2724
TP53           2494
IL6            2367
VEGFA          1899
IL1B           1801
               ... 
FMC1-LUC7L2       1
GUSBP15           1
MORN3             1
TULP3P1           1
EIF4EP2           1
Name: geneSymbol, Length: 21666, dtype: int64

In [26]:
cat_var.head(2)

Unnamed: 0,geneSymbol,diseaseId,diseaseName,diseaseType,diseaseClass,diseaseSemanticType,source
0,A1BG,C0001418,Adenocarcinoma,group,C04,Neoplastic Process,LHGDN
1,A1BG,C0002736,Amyotrophic Lateral Sclerosis,disease,C18;C10,Disease or Syndrome,BEFREE


In [27]:
len(cat_var.diseaseName.value_counts())

30170

In [28]:
cat_var.diseaseType.value_counts()

disease      781622
phenotype    211293
group        142027
Name: diseaseType, dtype: int64

In [29]:
len(cat_var.diseaseSemanticType.value_counts())

33

In [30]:
cat_var.source.value_counts()

BEFREE                                                             789377
HPO                                                                148643
CTD_human                                                           44786
GWASCAT                                                             34270
CLINVAR                                                             14326
                                                                    ...  
BEFREE;CTD_human;GWASCAT;GWASDB;HPO;ORPHANET                            1
BEFREE;CLINVAR;CTD_human;GENOMICS_ENGLAND;HPO;LHGDN;MGD;RGD             1
BEFREE;CLINGEN;CLINVAR;CTD_human;GENOMICS_ENGLAND;LHGDN;UNIPROT         1
BEFREE;CGI;CLINVAR;CTD_human;GENOMICS_ENGLAND;LHGDN;MGD                 1
BEFREE;CGI;CLINVAR;CTD_human;MGD                                        1
Name: source, Length: 878, dtype: int64

In [31]:
cat_var.diseaseClass.value_counts()

C04                    289130
C06;C04                 50706
C14                     33452
C23                     32747
C23;C10                 28039
                        ...  
C23;C21                     1
C11;C05;C10;C14             1
C16;C06;C04;C07             1
C16;C04;C05;C14             1
C23;C16;C13;C05;C10         1
Name: diseaseClass, Length: 1106, dtype: int64

In [32]:
le = LabelEncoder()
cat_var1 = cat_var.apply(le.fit_transform)

In [33]:
cat_var.head(3)

Unnamed: 0,geneSymbol,diseaseId,diseaseName,diseaseType,diseaseClass,diseaseSemanticType,source
0,A1BG,C0001418,Adenocarcinoma,group,C04,Neoplastic Process,LHGDN
1,A1BG,C0002736,Amyotrophic Lateral Sclerosis,disease,C18;C10,Disease or Syndrome,BEFREE
2,A1BG,C0003578,Apnea,phenotype,C23;C08,Sign or Symptom,BEFREE


In [34]:
cat_var1.head(3)

Unnamed: 0,geneSymbol,diseaseId,diseaseName,diseaseType,diseaseClass,diseaseSemanticType,source
0,0,52,1855,1,49,23,859
1,0,134,2533,0,613,11,0
2,0,222,3074,2,740,30,0


In [35]:
## 0 == disease
## 1 == group
## 2 == Phenotype
# cat_var.diseaseType.value_counts()
# cat_var1['diseaseType'] = df.diseaseType
cat_var1.head()

Unnamed: 0,geneSymbol,diseaseId,diseaseName,diseaseType,diseaseClass,diseaseSemanticType,source
0,0,52,1855,1,49,23,859
1,0,134,2533,0,613,11,0
2,0,222,3074,2,740,30,0
3,0,244,3209,0,127,11,0
4,0,565,6371,0,572,11,0


In [36]:
## Merging Categorical and numerical variables after Data Preprocessing.

data = pd.concat([num_var,cat_var1],axis = 1)


In [37]:
data.head()

Unnamed: 0,geneId,DSI,DPI,score,EI,YearInitial,YearFinal,NofPmids,NofSnps,geneSymbol,diseaseId,diseaseName,diseaseType,diseaseClass,diseaseSemanticType,source
0,1,0.7,0.538,0.01,1.0,2008.0,2008.0,1,0,0,52,1855,1,49,23,859
1,1,0.7,0.538,0.01,1.0,2008.0,2008.0,1,0,0,134,2533,0,613,11,0
2,1,0.7,0.538,0.01,1.0,2017.0,2017.0,1,0,0,222,3074,2,740,30,0
3,1,0.7,0.538,0.01,1.0,2019.0,2019.0,1,0,0,244,3209,0,127,11,0
4,1,0.7,0.538,0.01,1.0,2020.0,2020.0,1,0,0,565,6371,0,572,11,0


In [38]:
data[data['diseaseType']==2]

Unnamed: 0,geneId,DSI,DPI,score,EI,YearInitial,YearFinal,NofPmids,NofSnps,geneSymbol,diseaseId,diseaseName,diseaseType,diseaseClass,diseaseSemanticType,source
2,1,0.700,0.538,0.01,1.0,2017.0,2017.0,1,0,0,222,3074,2,740,30,0
8,1,0.700,0.538,0.01,1.0,2017.0,2017.0,1,0,0,1016,10752,2,572,30,0
9,1,0.700,0.538,0.01,1.0,2018.0,2018.0,1,0,0,1099,29976,2,654,2,0
11,1,0.700,0.538,0.30,1.0,2017.0,2017.0,1,0,0,1368,13314,2,716,15,762
55,2,0.529,0.769,0.20,1.0,1988.0,1988.0,1,0,3,1080,11387,2,654,30,876
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1134889,115482723,0.506,0.846,0.01,1.0,2017.0,2017.0,1,0,7178,12422,10329,2,49,11,0
1134895,115482723,0.506,0.846,0.07,1.0,2008.0,2018.0,7,0,7178,13468,28471,2,49,23,0
1134908,115482723,0.506,0.846,0.01,1.0,2016.0,2016.0,1,0,7178,15147,18117,2,49,23,0
1134923,115482723,0.506,0.846,0.01,1.0,2004.0,2004.0,1,0,7178,22616,23366,2,654,30,0


In [39]:
data.shape

(1134942, 16)

In [40]:
data.isnull().sum()

geneId                 0
DSI                    0
DPI                    0
score                  0
EI                     0
YearInitial            0
YearFinal              0
NofPmids               0
NofSnps                0
geneSymbol             0
diseaseId              0
diseaseName            0
diseaseType            0
diseaseClass           0
diseaseSemanticType    0
source                 0
dtype: int64

In [41]:
X = data.drop(['diseaseType','NofSnps','EI'],axis = 1)
X.head()

Unnamed: 0,geneId,DSI,DPI,score,YearInitial,YearFinal,NofPmids,geneSymbol,diseaseId,diseaseName,diseaseClass,diseaseSemanticType,source
0,1,0.7,0.538,0.01,2008.0,2008.0,1,0,52,1855,49,23,859
1,1,0.7,0.538,0.01,2008.0,2008.0,1,0,134,2533,613,11,0
2,1,0.7,0.538,0.01,2017.0,2017.0,1,0,222,3074,740,30,0
3,1,0.7,0.538,0.01,2019.0,2019.0,1,0,244,3209,127,11,0
4,1,0.7,0.538,0.01,2020.0,2020.0,1,0,565,6371,572,11,0


In [42]:
y = data.diseaseType

In [43]:
from imblearn.over_sampling import SMOTE 
sm = SMOTE(random_state=42)
X_res, y_res = sm.fit_resample(X, y)

In [44]:
X_res.shape, y_res.shape

((2344866, 13), (2344866,))

In [45]:
## Splitting the dataset into Training and Testing foir further use.
x_train,x_test,y_train,y_test = train_test_split(X_res,y_res,test_size = 0.3,random_state = 23)

### Random Forest Classifier


In [46]:

cfr = RandomForestClassifier()

In [47]:
model = cfr.fit(x_train,y_train)

KeyboardInterrupt: 

In [None]:
pred = model.predict(x_test)

In [None]:
model.predict(x_test)

In [None]:
X[:3]

In [None]:
model.score(x_test,y_test)

In [None]:
accuracy_score(y_test,pred)

In [None]:
cm = confusion_matrix(y_test,pred)
cm

In [None]:
clsr= classification_report(y_test,pred)
print(clsr)

In [None]:
sns.heatmap(cm, annot=True)

In [None]:
precision_score(y_test,pred,average = 'weighted')

In [None]:
recall_score(y_test,pred,average = 'micro')

In [None]:
f1_score(y_test,pred,average ='weighted')

In [None]:
fpr,tpr,t = roc_curve(y_true = y_test,y_score = pred,pos_label =2)

In [None]:
print(fpr,tpr,t)

In [None]:
plt.plot(fpr,tpr)
plt.plot([0,1],[0,1],'--')
plt.scatter(fpr,tpr)
plt.title('ROC CURVE')
plt.xlabel('False Positive Rate',color = 'black')
plt.ylabel('True Positive Rate',color = 'black')
plt.show()

In [None]:
model.feature_importances_

In [None]:
fi = pd.DataFrame(model.feature_importances_, index= x_train.columns,columns=['importance']).sort_values('importance',ascending=False)

In [None]:
feature_importances = pd.DataFrame(model.feature_importances_, index =x_train.columns,  columns=['importance']).sort_values('importance', ascending=False)

In [None]:
fi

In [None]:
accuracy_score(y_test,pred)

In [None]:
sns.heatmap(confusion_matrix(y_test,pred),annot= True)

In [None]:
# type(x_train),type(y_train)

In [None]:
# (x_train[0][1])

### Extreme Gradient Boosting (XGBoost) Classifier

In [None]:
import xgboost as xgb

In [None]:
xgc = xgb.XGBClassifier()

In [None]:
model1 = xgc.fit(x_train,y_train)

In [None]:
pred = model1.predict(x_test)

In [None]:
accuracy_score(y_test,pred)

In [None]:
precision_score(y_test,pred,average = 'macro')

In [None]:
recall_score(y_test,pred,average = 'weighted')

In [None]:
f1_score(y_test,pred,average='micro')

In [None]:
sns.heatmap(confusion_matrix(y_test,pred),annot = True)

In [None]:
fpr,tpr,t = roc_curve(y_true = y_test,y_score = pred,pos_label =2)

In [None]:
print(fpr,tpr,t)

In [None]:
plt.plot(fpr,tpr)
plt.plot([0,1],[0,1],'--')
plt.scatter(fpr,tpr)
plt.title('ROC CURVE')
plt.xlabel('False Positive Rate',color = 'black')
plt.ylabel('True Positive Rate',color = 'black')
plt.show()

In [None]:
roc_auc_score(y_test,pred,multi_class = 'ovo')

### Light Gradient Boosting Machine

In [None]:
import lightgbm as lgb

In [None]:
lgb1 = lgb.LGBMClassifier()

In [None]:
model2 = lgb1.fit(x_train,y_train)

In [None]:
pred2 = model2.predict(x_test)

In [None]:
print(classification_report(y_test,pred2))

In [None]:
accuracy_score(y_test,pred2)

In [None]:
precision_score(y_test,pred2,average = 'macro')

In [None]:
recall_score(y_test,pred2,average = 'weighted')

In [None]:
confusion_matrix(y_test,pred2)

In [None]:
sns.heatmap(confusion_matrix(y_test,pred2),annot = True)

In [None]:
fpr,tpr,t = roc_curve(y_true = y_test,y_score = pred2,pos_label =2)

print(fpr,tpr,t)

plt.plot(fpr,tpr)
plt.plot([0,1],[0,1],'--')
plt.scatter(fpr,tpr)
plt.title('ROC CURVE')
plt.xlabel('False Positive Rate',color = 'black')
plt.ylabel('True Positive Rate',color = 'black')
plt.show()

### K Nearest Neighbors

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
model3 = KNeighborsClassifier()

In [None]:
model3.fit(x_train,y_train)

In [None]:
pred3 = model3.predict(x_test)

In [None]:
accuracy_score(y_test,pred3)

In [None]:
model3.score(x_train,y_train)

In [None]:
precision_score(y_test,pred3,average = 'macro')

In [None]:
recall_score(y_test,pred3,average = 'micro')

In [None]:
f1_score(y_test,pred3,average = 'weighted')

In [None]:
print(classification_report(y_test,pred3))

In [None]:
confusion_matrix(y_test,pred3)

In [None]:
sns.heatmap(confusion_matrix(y_test,pred3),annot = True)

In [None]:
fpr,tpr,t = roc_curve(y_true = y_test,y_score = pred3,pos_label =2)

print(fpr,tpr,t)

plt.plot(fpr,tpr)
plt.plot([0,1],[0,1],'--')
plt.scatter(fpr,tpr)
plt.title('ROC CURVE')
plt.xlabel('False Positive Rate',color = 'black')
plt.ylabel('True Positive Rate',color = 'black')
plt.show()

### Support Vector machines

In [None]:
model4 = SVC()

In [None]:
model4.fit(x_train[:10000],y_train[:10000])


In [None]:
pred4 = model4.predict(x_test)

In [None]:
accuracy_score(y_test,pred4)

In [None]:
precision_score(y_test,pred4,average = 'micro')

In [None]:
recall_score(y_test,pred4,average = 'micro')

In [None]:
f1_score(y_test,pred4,average = 'weighted')

In [None]:
sns.heatmap(confusion_matrix(y_test,pred4),annot = True)

In [None]:
fpr,tpr,t = roc_curve(y_true = y_test,y_score = pred4,pos_label =2)

print(fpr,tpr,t)

plt.plot(fpr,tpr)
plt.plot([0,1],[0,1],'--')
plt.scatter(fpr,tpr)
plt.title('ROC CURVE')
plt.xlabel('False Positive Rate',color = 'black')
plt.ylabel('True Positive Rate',color = 'black')
plt.show()

In [48]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow as tf

In [49]:
x_train = tf.convert_to_tensor(x_train, dtype=tf.float32)
y_train = tf.convert_to_tensor(y_train, dtype=tf.float32)
x_test = tf.convert_to_tensor(x_test, dtype=tf.float32)
y_test = tf.convert_to_tensor(y_test, dtype=tf.float32)
model = Sequential()
model.add(Dense(30, activation='relu'))
model.add(Dense(20, activation='relu'))
model.add(Dense(1, activation='softmax'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) 

In [51]:
model.fit(x_train[:5000], y_train[:5000], batch_size=50, epochs=150, validation_data=(x_test, y_test))

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150


Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78/150
Epoch 79/150
Epoch 80/150
Epoch 81/150
Epoch 82/150
Epoch 83/150
Epoch 84/150
Epoch 85/150
Epoch 86/150
Epoch 87/150
Epoch 88/150
Epoch 89/150
Epoch 90/150
Epoch 91/150
Epoch 92/150
Epoch 93/150
Epoch 94/150
Epoch 95/150
Epoch 96/150
Epoch 97/150
Epoch 98/150
Epoch 99/150
Epoch 100/150
Epoch 101/150
Epoch 102/150


Epoch 103/150
Epoch 104/150
Epoch 105/150
Epoch 106/150
Epoch 107/150
Epoch 108/150
Epoch 109/150
Epoch 110/150
Epoch 111/150
Epoch 112/150
Epoch 113/150
Epoch 114/150
Epoch 115/150
Epoch 116/150
Epoch 117/150
Epoch 118/150
Epoch 119/150
Epoch 120/150
Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150
Epoch 125/150
Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150
Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150
Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150
Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150
Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150


<keras.callbacks.History at 0x2808d5b59e8>

In [52]:
model.save('ANN.h5')

### Predictions

In [None]:
ex = [[1,0.7,0.538,0.01,2008.0,2008.0,1,0,52,1855,49,23,85]]

In [None]:
model.predict(ex)

In [None]:
val = pd.DataFrame(ex,columns= X.columns)
val

In [None]:
model1.predict(val)

In [None]:
model2.predict(ex)

In [None]:
ex1 = [[1,0.7,0.538,0.01,2008.0,2008.0,1,0,52,1855,49,3,859]]

In [None]:
model2.predict(ex1)

In [None]:
val1 = pd.DataFrame(ex,columns = X.columns)

In [None]:
model1.predict(val1)

In [None]:
model.predict(ex1)

In [None]:
x_test[10:20]

In [None]:
y_test[10:20]

In [None]:
p = [[6524,0.499,0.769,0.01,2018.0,2018.0,1,17605,464,20458,73,23,0]]

In [None]:
model.predict(p)

In [None]:
p1 = [[4038,0.568,0.692,0.10,2013.0,2017.0,0,10315,5467,27685,49,15,850]]

In [None]:
model.predict(p1)

In [None]:
p3 = [[6558,0.552,0.808,0.03,2010.0,2019.0,3,17303,2898,6027,212,11,0]]

In [None]:
model.predict(p3)