In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from pathlib import Path
import pandas as pd
import tensorflow as tf
from sklearn.metrics import classification_report

In [2]:
# Import data
file_path = "https://static.bc-edx.com/ai/ail-v-1-0/m18/lesson_2/datasets/sports-articles.csv"
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,Label,totalWordsCount,semanticobjscore,semanticsubjscore,CC,CD,DT,EX,FW,INs,...,pronouns2nd,pronouns3rd,compsupadjadv,past,imperative,present3rd,present1st2nd,sentence1st,sentencelast,txtcomplexity
0,objective,109,0,1,7,9,0,5,8,6,...,0,3,0,11,0,0,0,0,1,18
1,objective,309,21,4,1,19,1,4,35,23,...,0,10,0,13,0,14,9,1,1,14
2,objective,149,6,1,8,14,0,5,15,11,...,0,2,0,8,0,3,2,1,1,18
3,objective,305,18,5,7,26,0,10,37,21,...,0,8,3,13,1,7,1,1,1,20
4,objective,491,23,8,33,47,0,12,61,36,...,0,16,2,34,1,5,6,1,1,24


In [3]:
# Drop the label to create the X data
X = df.drop('Label', axis=1)
# Create the y set from the "Label" column
y = df["Label"]

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [4]:
# Encode the y data with the label encoder
# Create an instance of the label encoder
le = LabelEncoder()

# Fit and transform the y training and testing data using the label encoder
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

In [5]:
# Scale the X data by using StandardScaler()
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
# Define the deep learning model
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=5, activation="tanh", input_dim=len(X_train.columns)))

nn_model.add(tf.keras.layers.Dense(units=3, activation="tanh"))
nn_model.add(tf.keras.layers.Dense(units=9, activation="tanh"))
nn_model.add(tf.keras.layers.Dense(units=3, activation="relu"))
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn_model.fit(X_train_scaled, y_train_encoded, epochs=100)

# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test_encoded,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 601us/step - accuracy: 0.5869 - loss: 0.6852
Epoch 2/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 521us/step - accuracy: 0.7393 - loss: 0.6539
Epoch 3/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 511us/step - accuracy: 0.7404 - loss: 0.6250
Epoch 4/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 482us/step - accuracy: 0.7554 - loss: 0.5850
Epoch 5/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 353us/step - accuracy: 0.8160 - loss: 0.5375
Epoch 6/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 349us/step - accuracy: 0.8219 - loss: 0.5032
Epoch 7/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 338us/step - accuracy: 0.8479 - loss: 0.4542
Epoch 8/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 343us/step - accuracy: 0.8418 - loss: 0.4388
Epoch 9/100
[1m24/24[0m [32m━━━━━━━━━━━━━

In [7]:
# Make predictions with the test data
predictions = nn_model.predict(X_test_scaled,verbose=2)
predictions

8/8 - 0s - 8ms/step


array([[0.03815341],
       [0.8745835 ],
       [0.08948498],
       [0.0411777 ],
       [0.07409059],
       [0.06725916],
       [0.04045147],
       [0.78976583],
       [0.07126686],
       [0.11594432],
       [0.38921067],
       [0.02073904],
       [0.88166296],
       [0.13485546],
       [0.8810704 ],
       [0.04534651],
       [0.043904  ],
       [0.03051418],
       [0.8768837 ],
       [0.08225006],
       [0.4650895 ],
       [0.11896951],
       [0.05913428],
       [0.04051836],
       [0.0451439 ],
       [0.05997383],
       [0.02463361],
       [0.02183391],
       [0.04357947],
       [0.0456701 ],
       [0.4650895 ],
       [0.9663629 ],
       [0.03846406],
       [0.03169036],
       [0.02420527],
       [0.04806503],
       [0.02623718],
       [0.99016654],
       [0.9195305 ],
       [0.0210583 ],
       [0.99067336],
       [0.97688264],
       [0.05546276],
       [0.0472107 ],
       [0.15982132],
       [0.02286284],
       [0.05661987],
       [0.075

In [8]:
# Save the predictions to a DataFrame and round the predictions to binary results
predictions_df = pd.DataFrame(columns=["predictions"], data=predictions)
predictions_df["predictions"] = round(predictions_df["predictions"],0)
predictions_df

Unnamed: 0,predictions
0,0.0
1,1.0
2,0.0
3,0.0
4,0.0
...,...
245,0.0
246,1.0
247,0.0
248,0.0


In [9]:
# Print the classification report with the y test data and predictions
print(classification_report(y_test_encoded, predictions_df["predictions"].values))

              precision    recall  f1-score   support

           0       0.84      0.90      0.87       157
           1       0.81      0.70      0.75        93

    accuracy                           0.83       250
   macro avg       0.82      0.80      0.81       250
weighted avg       0.83      0.83      0.82       250



## Save the Model

In [10]:
# Set the model's file path
file_path = Path("sports-articles.keras")

# Export your model to a keras file
nn_model.save(file_path)

## Load the Model

In [11]:
# Set the model's file path
file_path = Path("sports-articles.keras")

# Load the model to a new object
nn_imported = tf.keras.models.load_model(file_path)

In [12]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_imported.evaluate(X_test_scaled, y_test_encoded, verbose=2)

# Display evaluation results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

8/8 - 0s - 11ms/step - accuracy: 0.8280 - loss: 0.4313
Loss: 0.4313242435455322, Accuracy: 0.828000009059906
