<a href="https://colab.research.google.com/github/ooo-dev-code/Android-apps/blob/main/Predicting/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

!pip install --upgrade tensorflow tensorflow-hub
import tensorflow as tf
import tensorflow_hub as hub

from imblearn.over_sampling import RandomOverSampler

# Diabeties

In [79]:
data = pd.read_csv("./diabetes.csv")

In [None]:
data.head(10)

In [None]:
for i in range(len(data.columns[:-1])):
  label = data.columns[i]
  plt.hist(data[data["Outcome"]==1][label], color="blue", label="Diabetes", alpha=0.7, density=True) #Use density because the lenght of the datas are not the same.
  plt.hist(data[data["Outcome"]==0][label], color='red', label="No Diabetes", alpha=0.7, density=True)
  plt.title(label)
  plt.ylabel("Probability")
  plt.xlabel(label)
  plt.legend()
  plt.show()

In [82]:
X = data[data.columns[:-1]].values
y = data[data.columns[-1]].values

In [83]:
scaler = StandardScaler()
X = scaler.fit_transform(X)
over = RandomOverSampler()
X, y = over.fit_resample(X, y)

In [84]:
x_train, x_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=0)
x_valid, x_test, y_valid, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=0)

In [85]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(16, activation="relu"), # relu (== if x < 0: x == 0)
    tf.keras.layers.Dense(16, activation="relu"),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

In [86]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss = tf.keras.losses.BinaryCrossentropy(),
              metrics = ["accuracy"]
             )

In [None]:
model.fit(x_train, y_train, batch_size=32, epochs=100, verbose=2, validation_data=(x_valid, y_valid))

In [None]:
model.evaluate(x_test, y_test, verbose=2)

# Wine Review / RNN

In [None]:
df = pd.read_csv("wine-reviews.csv", usecols = ['country', 'description', 'points', 'price', 'variety', 'winery'])
df = df.dropna(subset=["description", "points"])
df.head()

In [None]:
plt.hist(df.points, bins=20)
plt.title("Points histogram")
plt.ylabel("N")
plt.xlabel("Points")
plt.show()

In [91]:
df["label"] = (df.points >= 90).astype(int)
df = df[["description", "label"]]

In [None]:
train, val, test = np.split(df.sample(frac=1), [int(0.8*len(df)), int(0.9*len(df))])

In [93]:
def df_to_dataset(dataframe, shuffle=True, batch_size=1024):
  df = dataframe.copy()
  labels = df.pop('label')
  df = df["description"]
  ds = tf.data.Dataset.from_tensor_slices((df, labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  ds = ds.prefetch(tf.data.AUTOTUNE)
  return ds

In [94]:
train_data = df_to_dataset(train)
valid_data = df_to_dataset(val)
test_data = df_to_dataset(test)

# Embedding + Model

In [None]:
!pip install tf_keras
import tf_keras
embedding = "https://tfhub.dev/google/nnlm-en-dim50/2"
hub_layer = hub.KerasLayer(embedding, dtype=tf.string, trainable=True)

model = tf_keras.Sequential()
model.add(hub_layer)
model.add(tf_keras.layers.Dense(16, activation='relu'))
model.add(tf_keras.layers.Dropout(0.4))
model.add(tf_keras.layers.Dense(16, activation='relu'))
model.add(tf_keras.layers.Dropout(0.4))
model.add(tf_keras.layers.Dense(1, activation='sigmoid'))


In [106]:
model.compile(optimizer = tf_keras.optimizers.Adam(learning_rate=0.001),
              loss = tf.keras.losses.BinaryCrossentropy(),
              metrics = ["accuracy"])

In [None]:
model.evaluate(train_data)

In [None]:
history = model.fit(train_data, epochs=10, validation_data=valid_data)

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')