In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
# Load and Explore the Data

# Load the dataset
url = "https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/master/data/Telco-Customer-Churn.csv"
df = pd.read_csv(url)

# Explore the data
print(df.head())
print(df.info())

   customerID  gender  SeniorCitizen Partner Dependents  tenure PhoneService  \
0  7590-VHVEG  Female              0     Yes         No       1           No   
1  5575-GNVDE    Male              0      No         No      34          Yes   
2  3668-QPYBK    Male              0      No         No       2          Yes   
3  7795-CFOCW    Male              0      No         No      45           No   
4  9237-HQITU  Female              0      No         No       2          Yes   

      MultipleLines InternetService OnlineSecurity  ... DeviceProtection  \
0  No phone service             DSL             No  ...               No   
1                No             DSL            Yes  ...              Yes   
2                No             DSL            Yes  ...               No   
3  No phone service             DSL            Yes  ...              Yes   
4                No     Fiber optic             No  ...               No   

  TechSupport StreamingTV StreamingMovies        Contract Pape

In [3]:
# Encode categorical features
label_enc = LabelEncoder()
df['gender'] = label_enc.fit_transform(df['gender'])
df['Partner'] = label_enc.fit_transform(df['Partner'])
df['Dependents'] = label_enc.fit_transform(df['Dependents'])
df['PhoneService'] = label_enc.fit_transform(df['PhoneService'])
df['PaperlessBilling'] = label_enc.fit_transform(df['PaperlessBilling'])
df['Churn'] = label_enc.fit_transform(df['Churn'])

In [4]:
# Select relevant features
features = ['tenure', 'MonthlyCharges', 'Contract', 'gender', 'SeniorCitizen', 'Partner', 'Dependents', 'PhoneService', 'PaperlessBilling']
X = pd.get_dummies(df[features])  # Get dummies to handle categorical variables
y = df['Churn']

In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
# Train a Model
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [7]:
# Initialize the model
model = LogisticRegression(max_iter=1000)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.7983909133932797
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.90      0.87      1539
           1       0.67      0.51      0.58       574

    accuracy                           0.80      2113
   macro avg       0.75      0.71      0.72      2113
weighted avg       0.79      0.80      0.79      2113



In [None]:
# Building a Neural Network for Chrun Prediction

In [None]:
"""
	•	Scikit-Learn: Best for quick, straightforward models like Naive Bayes, which work well with text data.
	•	Keras: Allows you to easily build and train neural networks with minimal code, suitable for more complex tasks like deep learning.
	•	TensorFlow: Provides maximum control and flexibility, allowing for custom model architectures and advanced features.
"""

In [None]:
#2. TensorFlow: Building a Neural Network for Churn Prediction

#Step 1: Load and Prepare the Dataset (Same as Scikit-Learn)

#Use the same dataset preparation steps as in the Scikit-Learn example.


In [8]:
# Step 2: Build and Train a Neural Network

import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

# Define the model
model = Sequential([
    Dense(16, input_shape=(X_train.shape[1],), activation='relu'),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.5677 - loss: 1.1819 - val_accuracy: 0.7515 - val_loss: 0.4739
Epoch 2/20
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7610 - loss: 0.5016 - val_accuracy: 0.7586 - val_loss: 0.4684
Epoch 3/20
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7662 - loss: 0.4896 - val_accuracy: 0.7819 - val_loss: 0.4545
Epoch 4/20
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7758 - loss: 0.4814 - val_accuracy: 0.7890 - val_loss: 0.4427
Epoch 5/20
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7733 - loss: 0.4750 - val_accuracy: 0.7880 - val_loss: 0.4392
Epoch 6/20
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7992 - loss: 0.4542 - val_accuracy: 0.7769 - val_loss: 0.4483
Epoch 7/20
[1m124/124[0m [32m━━━━━━━

In [9]:
# Step 3: Evaluate the Model
# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)

[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7984 - loss: 0.4114
Test Accuracy: 0.7941315770149231


In [None]:
# 3. Keras: Building and Visualizing a Simple Neural Network

# Note: Keras is now part of TensorFlow, so the steps are similar to those used for TensorFlow.

# Step 1: Load and Prepare the Dataset (Same as Scikit-Learn)

# Use the same dataset preparation steps as in the Scikit-Learn example.


In [10]:
# Step 2: Build, Train, and Visualize a Neural Network
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import plot_model

# Define the model
model = Sequential([
    Dense(16, input_shape=(X_train.shape[1],), activation='relu'),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

# Visualize the model architecture
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.7748 - loss: 0.5175 - val_accuracy: 0.7931 - val_loss: 0.4475
Epoch 2/20
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7629 - loss: 0.4939 - val_accuracy: 0.7505 - val_loss: 0.4727
Epoch 3/20
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7775 - loss: 0.4735 - val_accuracy: 0.7992 - val_loss: 0.4257
Epoch 4/20
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7780 - loss: 0.4767 - val_accuracy: 0.7951 - val_loss: 0.4191
Epoch 5/20
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7900 - loss: 0.4470 - val_accuracy: 0.7982 - val_loss: 0.4164
Epoch 6/20
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7864 - loss: 0.4532 - val_accuracy: 0.7961 - val_loss: 0.4181
Epoch 7/20
[1m124/124[0m 

3. Keras: Building and Visualizing a Simple Neural Network

Note: Keras is now part of TensorFlow, so the steps are similar to those used for TensorFlow.

Step 1: Load and Prepare the Dataset (Same as Scikit-Learn)

Use the same dataset preparation steps as in the Scikit-Learn example.


In [11]:
# Step 2: Build, Train, and Visualize a Neural Network
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import plot_model

# Define the model
model = Sequential([
    Dense(16, input_shape=(X_train.shape[1],), activation='relu'),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

# Visualize the model architecture
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.7454 - loss: 0.8919 - val_accuracy: 0.7617 - val_loss: 0.4942
Epoch 2/20
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7636 - loss: 0.5060 - val_accuracy: 0.7728 - val_loss: 0.4561
Epoch 3/20
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7781 - loss: 0.4881 - val_accuracy: 0.7535 - val_loss: 0.4662
Epoch 4/20
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7650 - loss: 0.4834 - val_accuracy: 0.7759 - val_loss: 0.4325
Epoch 5/20
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7679 - loss: 0.4748 - val_accuracy: 0.8022 - val_loss: 0.4330
Epoch 6/20
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7743 - loss: 0.4699 - val_accuracy: 0.7677 - val_loss: 0.4334
Epoch 7/20
[1m124/124[0m 

Scenario: Email Spam Detection

Background:
You receive many emails every day, some of which are important, while others are just spam. You want to create a system that can automatically detect whether an email is spam or not, so you don’t waste time on irrelevant emails.


1. Scikit-Learn: A Simple Classifier

Step 1: Load and Explore the Dataset

In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer

# Sample dataset (you can replace this with a real dataset)
data = {
    'email': [
        "Win a $1000 gift card now!",
        "Important meeting tomorrow at 10 AM",
        "You have been selected for a prize",
        "Can we schedule a call next week?",
        "Earn money quickly with this simple trick"
    ],
    'label': [1, 0, 1, 0, 1]  # 1 = spam, 0 = not spam
}
df = pd.DataFrame(data)

# Convert text data to numerical data
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['email'])

# Split the data into training and testing sets
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
# Step 2: Train a Naive Bayes Classifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Initialize the model
model = MultinomialNB()

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1



In [15]:
# Step 2: Build and Train a Neural Network
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Build the model
model = Sequential([
    Dense(16, input_shape=(X_train.shape[1],), activation='relu'),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=2, validation_split=0.2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 786ms/step - accuracy: 1.0000 - loss: 0.6598 - val_accuracy: 0.0000e+00 - val_loss: 0.7052
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 111ms/step - accuracy: 1.0000 - loss: 0.6407 - val_accuracy: 0.0000e+00 - val_loss: 0.7101
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 76ms/step - accuracy: 1.0000 - loss: 0.6263 - val_accuracy: 0.0000e+00 - val_loss: 0.7149
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 1.0000 - loss: 0.6266 - val_accuracy: 0.0000e+00 - val_loss: 0.7192
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - accuracy: 1.0000 - loss: 0.6181 - val_accuracy: 0.0000e+00 - val_loss: 0.7232
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step - accuracy: 1.0000 - loss: 0.6095 - val_accuracy: 0.0000e+00 - val_loss: 0.7272
Epoch 7/10
[1m2/2[

<keras.src.callbacks.history.History at 0x7a3df9910fa0>

In [16]:
# Step 3: Evaluate the Model
# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - accuracy: 0.0000e+00 - loss: 0.7294
Test Accuracy: 0.0


Explanation:

	•	Keras is used to build a simple neural network for spam detection. It allows for easy experimentation with different architectures and optimizers.

3. TensorFlow: Advanced Customization

TensorFlow is often used for more complex tasks, but here we will use it to demonstrate how you can build a neural network from scratch, similar to what we did with Keras but with more control.


In [17]:
#Step 1: Prepare the Data (Same as Scikit-Learn)

# Use the same dataset preparation steps as in the Scikit-Learn example.

# Step 2: Build and Train a Neural Network

import tensorflow as tf

# Define the model
class SpamDetectionModel(tf.keras.Model):
    def __init__(self):
        super(SpamDetectionModel, self).__init__()
        self.dense1 = tf.keras.layers.Dense(16, activation='relu')
        self.dense2 = tf.keras.layers.Dense(8, activation='relu')
        self.dense3 = tf.keras.layers.Dense(1, activation='sigmoid')

    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.dense2(x)
        return self.dense3(x)

# Initialize and compile the model
model = SpamDetectionModel()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train.toarray(), y_train, epochs=10, batch_size=2, validation_split=0.2)

Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 422ms/step - accuracy: 0.0000e+00 - loss: 0.8245 - val_accuracy: 1.0000 - val_loss: 0.6637
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - accuracy: 0.0000e+00 - loss: 0.7834 - val_accuracy: 1.0000 - val_loss: 0.6663
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step - accuracy: 0.0000e+00 - loss: 0.8110 - val_accuracy: 1.0000 - val_loss: 0.6690
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - accuracy: 0.0000e+00 - loss: 0.8010 - val_accuracy: 1.0000 - val_loss: 0.6716
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.0000e+00 - loss: 0.7597 - val_accuracy: 1.0000 - val_loss: 0.6742
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.0000e+00 - loss: 0.7824 - val_accuracy: 1.0000 - val_loss: 0.6768
Epoch 7/10
[1m2/2[0

<keras.src.callbacks.history.History at 0x7a3d96e0ce50>

In [18]:
# Step 3: Evaluate the Model
# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test.toarray(), y_test)
print("Test Accuracy:", accuracy)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 1.0000 - loss: 0.5661
Test Accuracy: 1.0
